hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

commit d672d713160ed2c05d1d631d809ba17cb5a806ae
Author: Daniel Bainton <dpb@driftaway.org>
Date:   Wed, 26 Mar 2014 14:44:38 +0000

Initial commit

Diffstat:
ALICENSE/COPYING | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/COPYING.LGPL | 504+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/LICENSE | 354+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/LUCENT | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/OPENSOLARIS.LICENSE | 385+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/README | 27+++++++++++++++++++++++++++
AREADME | 5+++++
A_install/install.1b | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A_install/install.c | 436+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A_install/mkfile | 12++++++++++++
Abc/bc.1 | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/bc.y | 743+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/lib.b | 241+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/mkfile | 11+++++++++++
Abc/yyval.sed | 22++++++++++++++++++++++
Acp/cp.1 | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/cp.c | 1264+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/ln.1 | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/mkfile | 9+++++++++
Acp/mv.1 | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.1 | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.c | 2061+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.h | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/mkfile | 8++++++++
Adc/version.c | 13+++++++++++++
Add/dd.1 | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Add/dd.c | 1035+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Add/mkfile | 7+++++++
Adiff/diff.1 | 493+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diff.c | 473+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diff.h | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffdir.c | 993+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffh.c | 410+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffreg.c | 1629+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffver.c | 15+++++++++++++++
Adiff/mkfile | 14++++++++++++++
Aed/depsinc.mk | 1+
Aed/ed.1 | 1033+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aed/ed.c | 2822+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aed/mkfile | 8++++++++
Aexpr/expr.1 | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexpr/expr.y | 546+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexpr/mkfile | 10++++++++++
Afind/find.1 | 558+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afind/find.c | 1554+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afind/mkfile | 8++++++++
Afmt/fmt.1 | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afmt/fmt.c | 678+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afmt/mkfile | 7+++++++
Agrep/ac.c | 578++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/alloc.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/alloc.h | 34++++++++++++++++++++++++++++++++++
Agrep/config.h | 4++++
Agrep/egrep.1 | 388+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/fgrep.1 | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.1 | 297+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.c | 727+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.h | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grid.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/mkfile | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/plist.c | 213+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/rcomp.c | 350+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/sus.c | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/hd.1 | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/hd.c | 715+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/mkfile | 7+++++++
Alex/allprint.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/depsinc.mk | 1+
Alex/getopt.c | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/header.c | 409+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/ldefs.c | 309+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/lex.1 | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/libmain.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alex/lsearch.c | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/main.c | 364+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/mkfile | 26++++++++++++++++++++++++++
Alex/nceucform | 480+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/ncform | 290+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/nrform | 188+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/once.h | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/parser.y | 978+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/reject.c | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/search.h | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sgs.h | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub1.c | 1017+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub2.c | 1217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub3.c | 395+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/wcio.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/yyless.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/yywrap.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/CHECK.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/_alloca.h | 27+++++++++++++++++++++++++++
Alibcommon/_malloc.h | 26++++++++++++++++++++++++++
Alibcommon/_utmpx.h | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/asciitype.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/asciitype.h | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/atoll.h | 8++++++++
Alibcommon/blank.h | 38++++++++++++++++++++++++++++++++++++++
Alibcommon/depsinc.mk | 2++
Alibcommon/getdir.c | 197+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/getdir.h | 33+++++++++++++++++++++++++++++++++
Alibcommon/getopt.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/gmatch.c | 136+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_alloc.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_close.c | 36++++++++++++++++++++++++++++++++++++
Alibcommon/ib_free.c | 33+++++++++++++++++++++++++++++++++
Alibcommon/ib_getlin.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_getw.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_open.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_popen.c | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_read.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_seek.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/iblok.h | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/mbtowi.h | 22++++++++++++++++++++++
Alibcommon/memalign.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/memalign.h | 35+++++++++++++++++++++++++++++++++++
Alibcommon/mkfile | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/msgselect.h | 30++++++++++++++++++++++++++++++
Alibcommon/oblok.c | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/oblok.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pathconf.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pathconf.h | 29+++++++++++++++++++++++++++++
Alibcommon/pfmt.c | 39+++++++++++++++++++++++++++++++++++++++
Alibcommon/pfmt.h | 46++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pfmt_label.c | 1+
Alibcommon/regexp.h | 1211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/regexpr.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/regexpr.h | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/setlabel.c | 40++++++++++++++++++++++++++++++++++++++++
Alibcommon/setuxlabel.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sfile.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sfile.h | 40++++++++++++++++++++++++++++++++++++++++
Alibcommon/sighold.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigignore.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/signal.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigpause.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigrelse.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigset.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigset.h | 38++++++++++++++++++++++++++++++++++++++
Alibcommon/strtol.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sysv3.c | 2++
Alibcommon/utmpx.c | 252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/vpfmt.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/COPYING.LGPL | 504+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/NOTES | 14++++++++++++++
Alibuxre/_collelem.c | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/_collmult.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/bracket.c | 829+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/colldata.h | 226+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/depsinc.mk | 2++
Alibuxre/mkfile | 19+++++++++++++++++++
Alibuxre/onefile.c | 38++++++++++++++++++++++++++++++++++++++
Alibuxre/re.h | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regcomp.c | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regdfa.c | 877+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regdfa.h | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regerror.c | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regex.h | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regexec.c | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regfree.c | 42++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regnfa.c | 1070+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regparse.c | 1091+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/stubs.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/wcharm.h | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/NOTICE | 34++++++++++++++++++++++++++++++++++
Amk/libbio/README | 5+++++
Amk/libbio/bbuffered.c | 20++++++++++++++++++++
Amk/libbio/bcat.c | 46++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bfildes.c | 9+++++++++
Amk/libbio/bflush.c | 33+++++++++++++++++++++++++++++++++
Amk/libbio/bgetc.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bgetd.c | 36++++++++++++++++++++++++++++++++++++
Amk/libbio/bgetrune.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/binit.c | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bio.3 | 371+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bio.h | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/boffset.c | 25+++++++++++++++++++++++++
Amk/libbio/bprint.c | 14++++++++++++++
Amk/libbio/bputc.c | 20++++++++++++++++++++
Amk/libbio/bputrune.c | 23+++++++++++++++++++++++
Amk/libbio/brdline.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/brdstr.c | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bread.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bseek.c | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bvprint.c | 38++++++++++++++++++++++++++++++++++++++
Amk/libbio/bwrite.c | 38++++++++++++++++++++++++++++++++++++++
Amk/libbio/depsinc.mk | 2++
Amk/libbio/lib9.h | 26++++++++++++++++++++++++++
Amk/libbio/mkfile | 23+++++++++++++++++++++++
Amk/libfmt/NOTICE | 25+++++++++++++++++++++++++
Amk/libfmt/README | 5+++++
Amk/libfmt/charstod.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/depsinc.mk | 2++
Amk/libfmt/dofmt.c | 617+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/dorfmt.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/errfmt.c | 16++++++++++++++++
Amk/libfmt/fltfmt.c | 668+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmt.c | 220+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmt.h | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtdef.h | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtfd.c | 36++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtfdflush.c | 22++++++++++++++++++++++
Amk/libfmt/fmtinstall.3 | 379+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtlocale.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtlock.c | 15+++++++++++++++
Amk/libfmt/fmtnull.c | 33+++++++++++++++++++++++++++++++++
Amk/libfmt/fmtprint.c | 36++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtquote.c | 259+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtrune.c | 28++++++++++++++++++++++++++++
Amk/libfmt/fmtstr.c | 16++++++++++++++++
Amk/libfmt/fmtvprint.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libfmt/fprint.c | 17+++++++++++++++++
Amk/libfmt/mkfile | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/nan.h | 4++++
Amk/libfmt/nan64.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/plan9.h | 38++++++++++++++++++++++++++++++++++++++
Amk/libfmt/pow10.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/print.3 | 482+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/print.c | 17+++++++++++++++++
Amk/libfmt/runefmtstr.c | 16++++++++++++++++
Amk/libfmt/runeseprint.c | 18++++++++++++++++++
Amk/libfmt/runesmprint.c | 18++++++++++++++++++
Amk/libfmt/runesnprint.c | 19+++++++++++++++++++
Amk/libfmt/runesprint.c | 18++++++++++++++++++
Amk/libfmt/runevseprint.c | 29+++++++++++++++++++++++++++++
Amk/libfmt/runevsmprint.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/runevsnprint.c | 28++++++++++++++++++++++++++++
Amk/libfmt/seprint.c | 17+++++++++++++++++
Amk/libfmt/smprint.c | 17+++++++++++++++++
Amk/libfmt/snprint.c | 18++++++++++++++++++
Amk/libfmt/sprint.c | 30++++++++++++++++++++++++++++++
Amk/libfmt/strtod.c | 520+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/test.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/test2.c | 9+++++++++
Amk/libfmt/test3.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/vfprint.c | 21+++++++++++++++++++++
Amk/libfmt/vseprint.c | 28++++++++++++++++++++++++++++
Amk/libfmt/vsmprint.c | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/vsnprint.c | 28++++++++++++++++++++++++++++
Amk/libregexp/NOTICE | 25+++++++++++++++++++++++++
Amk/libregexp/README | 5+++++
Amk/libregexp/depsinc.mk | 2++
Amk/libregexp/lib9.h | 10++++++++++
Amk/libregexp/mkfile | 15+++++++++++++++
Amk/libregexp/regaux.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regcomp.c | 555+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regcomp.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regerror.c | 14++++++++++++++
Amk/libregexp/regexec.c | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.3 | 220+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.7 | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regsub.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/rregexec.c | 212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/rregsub.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/test.c | 46++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/test2.c | 20++++++++++++++++++++
Amk/libutf/NOTICE | 25+++++++++++++++++++++++++
Amk/libutf/README | 5+++++
Amk/libutf/depsinc.mk | 2++
Amk/libutf/isalpharune.3 | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/mkfile | 27+++++++++++++++++++++++++++
Amk/libutf/plan9.h | 29+++++++++++++++++++++++++++++
Amk/libutf/rune.3 | 194+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/rune.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcat.3 | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcat.c | 25+++++++++++++++++++++++++
Amk/libutf/runestrchr.c | 35+++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcmp.c | 35+++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcpy.c | 28++++++++++++++++++++++++++++
Amk/libutf/runestrdup.c | 30++++++++++++++++++++++++++++++
Amk/libutf/runestrecpy.c | 32++++++++++++++++++++++++++++++++
Amk/libutf/runestrlen.c | 24++++++++++++++++++++++++
Amk/libutf/runestrncat.c | 32++++++++++++++++++++++++++++++++
Amk/libutf/runestrncmp.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrncpy.c | 33+++++++++++++++++++++++++++++++++
Amk/libutf/runestrrchr.c | 30++++++++++++++++++++++++++++++
Amk/libutf/runestrstr.c | 44++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runetype.c | 1151+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utf.7 | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utf.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfdef.h | 33+++++++++++++++++++++++++++++++++
Amk/libutf/utfecpy.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/utflen.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/utfnlen.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfrrune.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfrune.c | 44++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfutf.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/mk/NOTICE | 34++++++++++++++++++++++++++++++++++
Amk/mk/README | 5+++++
Amk/mk/arc.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/archive.c | 253+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/bufblock.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/env.c | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/file.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/fns.h | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/graph.c | 279+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/job.c | 33+++++++++++++++++++++++++++++++++
Amk/mk/lex.c | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/main.c | 287+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/match.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.1 | 693+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.c | 234+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.h | 185+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mkfile | 35+++++++++++++++++++++++++++++++++++
Amk/mk/parse.c | 318+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/rc.c | 194+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/recipe.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/rule.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/run.c | 296+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/sh.c | 206+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/shell.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/shprint.c | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/symtab.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/sys.h | 27+++++++++++++++++++++++++++
Amk/mk/unix.c | 341+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/var.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/mk/varsub.c | 252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/word.c | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mkfile | 4++++
Amkfile | 5+++++
Anawk/COPYING | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/NOTES | 20++++++++++++++++++++
Anawk/awk.g.y | 468+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/awk.h | 387+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/awk.lx.l | 383+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/b.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/lib.c | 852+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/main.c | 215+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/maketab.c | 177+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/mkfile | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/nawk.1 | 585+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/parse.c | 248+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/run.c | 1962+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/tran.c | 483+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/version.c | 25+++++++++++++++++++++++++
Aod/mkfile | 8++++++++
Aod/od.1 | 291++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aod/od.c | 1078+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/backupfile.c | 246+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/backupfile.h | 39+++++++++++++++++++++++++++++++++++++++
Apatch/common.h | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/inp.c | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/inp.h | 32++++++++++++++++++++++++++++++++
Apatch/mkfile | 7+++++++
Apatch/mkpath.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/patch.1 | 700+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/patch.c | 1074+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/pathnames.h | 12++++++++++++
Apatch/pch.c | 1596+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/pch.h | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/util.c | 432+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/util.h | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Apgrep/mkfile | 11+++++++++++
Apgrep/pgrep.1 | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apgrep/pgrep.c | 1748+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aprintf/mkfile | 7+++++++
Aprintf/printf.1 | 254+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aprintf/printf.c | 402+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/NOTES | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/mkfile | 8++++++++
Aps/ps.1 | 488+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.1b | 421+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.c | 5043+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.dfl | 39+++++++++++++++++++++++++++++++++++++++
Ased/mkfile | 7+++++++
Ased/sed.1 | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed.h | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed0.c | 1266+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed1.c | 917+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/version.c | 22++++++++++++++++++++++
Astty/mkfile | 8++++++++
Astty/stty.1 | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astty/stty.1b | 345+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astty/stty.c | 1490+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/NOTES | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/mkfile | 9+++++++++
Atar/tar.1 | 473+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/tar.c | 3204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/tar.dfl | 9+++++++++
Ayacc/depsinc.mk | 1+
Ayacc/dextern | 319+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/getopt.c | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/libmai.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/libzer.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Ayacc/mkfile | 15+++++++++++++++
Ayacc/sgs.h | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y1.c | 1098+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y2.c | 1758+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y3.c | 568+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y4.c | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y5.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/yacc.1 | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/yaccpar | 565+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
394 files changed, 93648 insertions(+), 0 deletions(-)

diff --git a/LICENSE/COPYING b/LICENSE/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/LICENSE/COPYING.LGPL b/LICENSE/COPYING.LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/LICENSE/LICENSE b/LICENSE/LICENSE @@ -0,0 +1,354 @@ +******************************************************************************** +The license for newly written code and changes to existing code is: + + Copyright (c) 2003 Gunnar Ritter + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute + it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. + +The following tools have been rewritten from scratch: + + basename cat chown cmp copy cp + cpio* csplit cut date dd df dirname + du env expand false fold groups hd + head hostname id install kill line + listusers logins logname man mesg mkdir + mkfifo mknod mt newform news nice nl + nohup od paste pathchk pg pgrep printenv + printf priocntl ps psrinfo pwd rm + rmdir sdiff setpgrp shl sleep split stty + su sync tabs tape tapecntl tee + time touch true tty uname unexpand + users wc who whoami whodo xargs yes + + * See below for the licenses on compression codes. + +All source code and documentation has been changed intensively, thus +the above license applies to all material distributed here, further +restricted by the original licenses. + +******************************************************************************** +Caldera's License for Unix 6th Edition, Unix 7th Edition, and Unix 32V +applies to nearly all manual pages and to the utilities based on these +Unix versions: + + Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + Redistributions of source code and documentation must retain the + above copyright notice, this list of conditions and the following + disclaimer. + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed or owned by Caldera + International, Inc. + Neither the name of Caldera International, Inc. nor the names of + other contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +These utilities have been derived from Ancient Unix code: + + banner bc cal calendar chmod cksum + col comm dc deroff(1b) diff diff3 + ed egrep expr factor fgrep file find + grep join mkdir oawk pr random sed + sort sum tail tar tr tsort uniq + unit + +as well as the 'gmatch' and 'regexp' parts of libcommon. + +******************************************************************************** +Some utilities and manual pages are based on various releases of +4BSD, governed by the following license: + + Copyright (c) 1980, 1993 + The Regents of the University of California. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. + 4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The following utilities include 4BSD code: + + bc ching dc deroff(1b) diff diff3 fmt + ln(1b) more nawk oawk renice tcopy ul + +******************************************************************************** +The following utilities are based on Sun's OpenSolaris code; the file +OPENSOLARIS.LICENSE contains the licensing conditions for them: + + bdiff bfs dircmp echo fmtmsg getconf getopt mail + mvdir spell test what + +Some manual pages have also been derived from OpenSolaris code; see +the header of the respective page. +Changes to these programs are also subject to the original license. +******************************************************************************** +One utilities is based on the MINIX 2.0 sources, to which the +following license applies: + + Copyright (c) 1987,1997, Prentice Hall All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The utility derived from MINIX sources is: + + ls + +******************************************************************************** +The utility 'nawk' and the library 'libuxre' are based on the Unix tools +released made available by Caldera at <http://unixtools.sourceforge.net/>. +GNU GPL 2.0 applies to 'nawk' (see the file COPYING); GNU LGPL 2.1 applies +to 'libuxre' (see COPYING.LGPL). Changes to these tools are subject to these +licenses also. + +******************************************************************************** +The 'deroff' utility is derived from Plan 9 <http://cm.bell-labs.com/plan9dist/> +and is distributed under the terms of the Lucent Public License Version 1.02; +see the file LUCENT. + +******************************************************************************** +The CRC-32 function for cpio was derived from zlib 1.1.4: + + Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +******************************************************************************** +The inflate decompression code for the zip support has been derived from +Info-ZIP's zip 5.50: + + This is version 2002-Feb-16 of the Info-ZIP copyright and license. + The definitive version of this document should be available at + ftp://ftp.info-zip.org/pub/infozip/license.html indefinitely. + + + Copyright (c) 1990-2002 Info-ZIP. All rights reserved. + + For the purposes of this copyright and license, "Info-ZIP" is defined as + the following set of individuals: + + Mark Adler, John Bush, Karl Davis, Harald Denker, Jean-Michel + Dubois, Jean-loup Gailly, Hunter Goatley, Ian Gorman, Chris + Herborth, Dirk Haase, Greg Hartwig, Robert Heath, Jonathan Hudson, + Paul Kienitz, David Kirschbaum, Johnny Lee, Onno van der Linden, + Igor Mandrichenko, Steve P. Miller, Sergio Monesi, Keith Owens, + George Petrov, Greg Roelofs, Kai Uwe Rommel, Steve Salisbury, Dave + Smith, Christian Spieler, Antoine Verheijen, Paul von Behren, Rich + Wales, Mike White + + This software is provided "as is," without warranty of any kind, + express or implied. In no event shall Info-ZIP or its contributors + be held liable for any direct, indirect, incidental, special or + consequential damages arising out of the use of or inability to use + this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute + it freely, subject to the following restrictions: + + 1. Redistributions of source code must retain the above copyright + notice, definition, disclaimer, and this list of conditions. + + 2. Redistributions in binary form (compiled executables) must + reproduce the above copyright notice, definition, disclaimer, + and this list of conditions in documentation and/or other + materials provided with the distribution. The sole exception + to this condition is redistribution of a standard UnZipSFX + binary as part of a self-extracting archive; that is permitted + without inclusion of this license, as long as the normal + UnZipSFX banner has not been removed from the binary or + disabled. + + 3. Altered versions--including, but not limited to, ports to new + operating systems, existing ports with new graphical + interfaces, and dynamic, shared, or static library + versions--must be plainly marked as such and must not be + misrepresented as being the original source. Such altered + versions also must not be misrepresented as being Info-ZIP + releases--including, but not limited to, labeling of the + altered versions with the names "Info-ZIP" (or any variation + thereof, including, but not limited to, different + capitalizations), "Pocket UnZip," "WiZ" or "MacZip" without + the explicit permission of Info-ZIP. Such altered versions + are further prohibited from misrepresentative use of the + Zip-Bugs or Info-ZIP e-mail addresses or of the Info-ZIP + URL(s). + + 4. Info-ZIP retains the right to use the names "Info-ZIP," "Zip," + "UnZip," "UnZipSFX," "WiZ," "Pocket UnZip," "Pocket Zip," and + "MacZip" for its own source and binary releases. + +******************************************************************************** +The unshrink decompression code is derived from Info-ZIP's unzip 5.40: + + * Copyright (c) 1994 Greg Roelofs. + * Permission is granted to any individual/institution/corporate + * entity to use, copy, redistribute or modify this software for + * any purpose whatsoever, subject to the conditions noted in the + * Frequently Asked Questions section below, plus one additional + * condition: namely, that my name not be removed from the source + * code. (Other names may, of course, be added as modifications + * are made.) Corporate legal staff (like at IBM :-) ) who have + * problems understanding this can contact me through Zip-Bugs... + + + Q. Can I use the source code of Zip and UnZip in my commercial + application? + + A. Yes, so long as you include in your product an acknowledgment; a + pointer to the original, free compression sources; and a statement + making it clear that there are no extra or hidden charges resulting + from the use of our compression code in your product (see below for + an example). The acknowledgment should appear in at least one piece + of human-readable documentation (e.g., a README file or man page), + although additionally putting it in the executable(s) is OK, too. + In other words, you are allowed to sell only your own work, not ours, + and we'd like a little credit. (Note the additional restrictions + above on the code in unreduce.c, unshrink.c, vms.c, time_lib.c, and + everything in the wince and windll subdirectories.) Contact us at + Zip-Bugs@lists.wku.edu if you have special requirements. We also + like to hear when our code is being used, but we don't require that. + + <Product> incorporates compression code from the Info-ZIP group. + There are no extra charges or costs due to the use of this code, + and the original compression sources are freely available from + http://www.cdrom.com/pub/infozip/ or ftp://ftp.cdrom.com/pub/infozip/ + on the Internet. + + If you only need compression capability, not full zipfile support, + you might want to look at zlib instead; it has fewer restrictions + on commercial use. See http://www.cdrom.com/pub/infozip/zlib/ . + +******************************************************************************** +The blast decompression code (for DCL imploded zip archive entries) was +derived from code by Mark Adler distributed with zlib 1.2.1: + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + +******************************************************************************** + +******************************************************************************** +The explode decompression code is derived from unzip 5.40; this version +of this code was put in the public domain by Mark Adler. + +******************************************************************************** + +Gunnar Ritter 2/3/07 diff --git a/LICENSE/LUCENT b/LICENSE/LUCENT @@ -0,0 +1,258 @@ +The Plan 9 software is provided under the terms of the +Lucent Public License, Version 1.02, reproduced below, +with the following exceptions: + +1. No right is granted to create derivative works of or + to redistribute (other than with the Plan 9 Operating System) + the screen imprinter fonts identified in subdirectory + /lib/font/bit/lucida and printer fonts (Lucida Sans Unicode, Lucida + Sans Italic, Lucida Sans Demibold, Lucida Typewriter, Lucida Sans + Typewriter83), identified in subdirectory /sys/lib/postscript/font. + These directories contain material copyrights by B&H Inc. and Y&Y Inc. + +2. The printer fonts identified in subdirectory /sys/lib/ghostscript/font + are subject to the GNU GPL, reproduced in the file /LICENSE.gpl. + +3. The ghostscript program in the subdirectory /sys/src/cmd/gs is + covered by the Aladdin Free Public License, reproduced in the file + /LICENSE.afpl. + +=================================================================== + +Lucent Public License Version 1.02 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE +PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a. in the case of Lucent Technologies Inc. ("LUCENT"), the Original + Program, and + b. in the case of each Contributor, + + i. changes to the Program, and + ii. additions to the Program; + + where such changes and/or additions to the Program were added to the + Program by such Contributor itself or anyone acting on such + Contributor's behalf, and the Contributor explicitly consents, in + accordance with Section 3C, to characterization of the changes and/or + additions as Contributions. + +"Contributor" means LUCENT and any other entity that has Contributed a +Contribution to the Program. + +"Distributor" means a Recipient that distributes the Program, +modifications to the Program, or any part thereof. + +"Licensed Patents" mean patent claims licensable by a Contributor +which are necessarily infringed by the use or sale of its Contribution +alone or when combined with the Program. + +"Original Program" means the original version of the software +accompanying this Agreement as released by LUCENT, including source +code, object code and documentation, if any. + +"Program" means the Original Program and Contributions or any part +thereof + +"Recipient" means anyone who receives the Program under this +Agreement, including all Contributors. + +2. GRANT OF RIGHTS + + a. Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare derivative works of, publicly display, + publicly perform, distribute and sublicense the Contribution of such + Contributor, if any, and such derivative works, in source code and + object code form. + + b. Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, if + any, in source code and object code form. The patent license granted + by a Contributor shall also apply to the combination of the + Contribution of that Contributor and the Program if, at the time the + Contribution is added by the Contributor, such addition of the + Contribution causes such combination to be covered by the Licensed + Patents. The patent license granted by a Contributor shall not apply + to (i) any other combinations which include the Contribution, nor to + (ii) Contributions of other Contributors. No hardware per se is + licensed hereunder. + + c. Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. Each + Contributor disclaims any liability to Recipient for claims brought by + any other entity based on infringement of intellectual property rights + or otherwise. As a condition to exercising the rights and licenses + granted hereunder, each Recipient hereby assumes sole responsibility + to secure any other intellectual property rights needed, if any. For + example, if a third party patent license is required to allow + Recipient to distribute the Program, it is Recipient's responsibility + to acquire that license before distributing the Program. + + d. Each Contributor represents that to its knowledge it has sufficient + copyright rights in its Contribution, if any, to grant the copyright + license set forth in this Agreement. + +3. REQUIREMENTS + +A. Distributor may choose to distribute the Program in any form under +this Agreement or under its own license agreement, provided that: + + a. it complies with the terms and conditions of this Agreement; + + b. if the Program is distributed in source code or other tangible + form, a copy of this Agreement or Distributor's own license agreement + is included with each copy of the Program; and + + c. if distributed under Distributor's own license agreement, such + license agreement: + + i. effectively disclaims on behalf of all Contributors all warranties + and conditions, express and implied, including warranties or + conditions of title and non-infringement, and implied warranties or + conditions of merchantability and fitness for a particular purpose; + ii. effectively excludes on behalf of all Contributors all liability + for damages, including direct, indirect, special, incidental and + consequential damages, such as lost profits; and + iii. states that any provisions which differ from this Agreement are + offered by that Contributor alone and not by any other party. + +B. Each Distributor must include the following in a conspicuous + location in the Program: + + Copyright (C) 2003, Lucent Technologies Inc. and others. All Rights + Reserved. + +C. In addition, each Contributor must identify itself as the +originator of its Contribution in a manner that reasonably allows +subsequent Recipients to identify the originator of the Contribution. +Also, each Contributor must agree that the additions and/or changes +are intended to be a Contribution. Once a Contribution is contributed, +it may not thereafter be revoked. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain +responsibilities with respect to end users, business partners and the +like. While this license is intended to facilitate the commercial use +of the Program, the Distributor who includes the Program in a +commercial product offering should do so in a manner which does not +create potential liability for Contributors. Therefore, if a +Distributor includes the Program in a commercial product offering, +such Distributor ("Commercial Distributor") hereby agrees to defend +and indemnify every Contributor ("Indemnified Contributor") against +any losses, damages and costs (collectively"Losses") arising from +claims, lawsuits and other legal actions brought by a third party +against the Indemnified Contributor to the extent caused by the acts +or omissions of such Commercial Distributor in connection with its +distribution of the Program in a commercial product offering. The +obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. +In order to qualify, an Indemnified Contributor must: a) promptly +notify the Commercial Distributor in writing of such claim, and b) +allow the Commercial Distributor to control, and cooperate with the +Commercial Distributor in, the defense and any related settlement +negotiations. The Indemnified Contributor may participate in any such +claim at its own expense. + +For example, a Distributor might include the Program in a commercial +product offering, Product X. That Distributor is then a Commercial +Distributor. If that Commercial Distributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Distributor's responsibility +alone. Under this section, the Commercial Distributor would have to +defend claims against the Contributors related to those performance +claims and warranties, and if a court requires any Contributor to pay +any damages as a result, the Commercial Distributor must pay those +damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS +PROVIDED ON AN"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY +WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY +OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely +responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its +exercise of rights under this Agreement, including but not limited to +the risks and costs of program errors, compliance with applicable +laws, damage to or loss of data, programs or equipment, and +unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR +ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING +WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR +DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED +HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. EXPORT CONTROL + +Recipient agrees that Recipient alone is responsible for compliance +with the United States export administration regulations (and the +export control laws and regulation of any other countries). + +8. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with +respect to a patent applicable to software (including a cross-claim or +counterclaim in a lawsuit), then any patent licenses granted by that +Contributor to such Recipient under this Agreement shall terminate as +of the date such litigation is filed. In addition, if Recipient +institutes patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or +hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and +survive. + +LUCENT may publish new versions (including revisions) of this +Agreement from time to time. Each new version of the Agreement will be +given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the +Agreement under which it was received. In addition, after a new +version of the Agreement is published, Contributor may elect to +distribute the Program (including its Contributions) under the new +version. No one other than LUCENT has the right to modify this +Agreement. Except as expressly stated in Sections 2(a) and 2(b) above, +Recipient receives no rights or licenses to the intellectual property +of any Contributor under this Agreement, whether expressly, by +implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and +the intellectual property laws of the United States of America. No +party to this Agreement will bring a legal action under this Agreement +more than one year after the cause of action arose. Each party waives +its rights to a jury trial in any resulting litigation. + diff --git a/LICENSE/OPENSOLARIS.LICENSE b/LICENSE/OPENSOLARIS.LICENSE @@ -0,0 +1,385 @@ +Unless otherwise noted, all files in this distribution are released +under the Common Development and Distribution License (CDDL), +Version 1.0 only. Exceptions are noted within the associated +source files. + +-------------------------------------------------------------------- + + +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates + or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Software, prior Modifications used by a Contributor (if any), + and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) + Modifications, or (c) the combination of files containing + Original Software with files containing Modifications, in + each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other + than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first + makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or + portions thereof with code not governed by the terms of this + License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed + herein. + + 1.9. "Modifications" means the Source Code and Executable form of + any of the following: + + A. Any file that results from an addition to, deletion from or + modification of the contents of a file containing Original + Software or previous Modifications; + + B. Any new file that contains any part of the Original + Software or previous Modifications; or + + C. Any new file that is contributed or otherwise made + available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable + form of computer software code that is originally released + under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, + process, and apparatus claims, in any patent Licensable by + grantor. + + 1.12. "Source Code" means (a) the common form of computer software + code in which modifications are made and (b) associated + documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms + of, this License. For legal entities, "You" includes any + entity which controls, is controlled by, or is under common + control with You. For purposes of this definition, + "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty + percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, the Initial + Developer hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer, to use, + reproduce, modify, display, perform, sublicense and + distribute the Original Software (or portions thereof), + with or without Modifications, and/or as part of a Larger + Work; and + + (b) under Patent Claims infringed by the making, using or + selling of Original Software, to make, have made, use, + practice, sell, and offer for sale, and/or otherwise + dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are + effective on the date Initial Developer first distributes + or otherwise makes the Original Software available to a + third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: (1) for code that You delete from the Original + Software, or (2) for infringements caused by: (i) the + modification of the Original Software, or (ii) the + combination of the Original Software with other software + or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, each + Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor to use, reproduce, + modify, display, perform, sublicense and distribute the + Modifications created by such Contributor (or portions + thereof), either on an unmodified basis, with other + Modifications, as Covered Software and/or as part of a + Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either + alone and/or in combination with its Contributor Version + (or portions of such combination), to make, use, sell, + offer for sale, have made, and/or otherwise dispose of: + (1) Modifications made by that Contributor (or portions + thereof); and (2) the combination of Modifications made by + that Contributor with its Contributor Version (or portions + of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first distributes or + otherwise makes the Modifications available to a third + party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: (1) for any code that Contributor has deleted + from the Contributor Version; (2) for infringements caused + by: (i) third party modifications of Contributor Version, + or (ii) the combination of Modifications made by that + Contributor with other software (except as part of the + Contributor Version) or other devices; or (3) under Patent + Claims infringed by Covered Software in the absence of + Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make + available in Executable form must also be made available in Source + Code form and that Source Code form must be distributed only under + the terms of this License. You must include a copy of this + License with every copy of the Source Code form of the Covered + Software You distribute or otherwise make available. You must + inform recipients of any such Covered Software in Executable form + as to how they can obtain such Covered Software in Source Code + form in a reasonable manner on or through a medium customarily + used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are + governed by the terms of this License. You represent that You + believe Your Modifications are Your original creation(s) and/or + You have sufficient rights to grant the rights conveyed by this + License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that + identifies You as the Contributor of the Modification. You may + not remove or alter any copyright, patent or trademark notices + contained within the Covered Software, or any notices of licensing + or any descriptive text giving attribution to any Contributor or + the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in + Source Code form that alters or restricts the applicable version + of this License or the recipients' rights hereunder. You may + choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of + Covered Software. However, you may do so only on Your own behalf, + and not on behalf of the Initial Developer or any Contributor. + You must make it absolutely clear that any such warranty, support, + indemnity or liability obligation is offered by You alone, and You + hereby agree to indemnify the Initial Developer and every + Contributor for any liability incurred by the Initial Developer or + such Contributor as a result of warranty, support, indemnity or + liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software + under the terms of this License or under the terms of a license of + Your choice, which may contain terms different from this License, + provided that You are in compliance with the terms of this License + and that the license for the Executable form does not attempt to + limit or alter the recipient's rights in the Source Code form from + the rights set forth in this License. If You distribute the + Covered Software in Executable form under a different license, You + must make it absolutely clear that any terms which differ from + this License are offered by You alone, not by the Initial + Developer or Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred + by the Initial Developer or such Contributor as a result of any + such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with + other code not governed by the terms of this License and + distribute the Larger Work as a single product. In such a case, + You must make sure the requirements of this License are fulfilled + for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Sun Microsystems, Inc. is the initial license steward and may + publish revised and/or new versions of this License from time to + time. Each version will be given a distinguishing version number. + Except as provided in Section 4.3, no one other than the license + steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the + Covered Software available under the terms of the version of the + License under which You originally received the Covered Software. + If the Initial Developer includes a notice in the Original + Software prohibiting it from being distributed or otherwise made + available under any subsequent version of the License, You must + distribute and make the Covered Software available under the terms + of the version of the License under which You originally received + the Covered Software. Otherwise, You may also choose to use, + distribute or otherwise make the Covered Software available under + the terms of any subsequent version of the License published by + the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new + license for Your Original Software, You may create and use a + modified version of this License if You: (a) rename the license + and remove any references to the name of the license steward + (except to note that the license differs from this License); and + (b) otherwise make it clear that the license contains terms which + differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" + BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED + SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR + PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY + COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE + INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS + DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to + cure such breach within 30 days of becoming aware of the breach. + Provisions which, by their nature, must remain in effect beyond + the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding + declaratory judgment actions) against Initial Developer or a + Contributor (the Initial Developer or Contributor against whom You + assert such claim is referred to as "Participant") alleging that + the Participant Software (meaning the Contributor Version where + the Participant is a Contributor or the Original Software where + the Participant is the Initial Developer) directly or indirectly + infringes any patent, then any and all rights granted directly or + indirectly to You by such Participant, the Initial Developer (if + the Initial Developer is not the Participant) and all Contributors + under Sections 2.1 and/or 2.2 of this License shall, upon 60 days + notice from Participant terminate prospectively and automatically + at the expiration of such 60 day notice period, unless if within + such 60 day period You withdraw Your claim with respect to the + Participant Software against such Participant either unilaterally + or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, + all end user licenses that have been validly granted by You or any + distributor hereunder prior to termination (excluding licenses + granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE + INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF + COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE + LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK + STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL + INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT + APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO + NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR + CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT + APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is + defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial + computer software" (as that term is defined at 48 + C.F.R. 252.227-7014(a)(1)) and "commercial computer software + documentation" as such terms are used in 48 C.F.R. 12.212 + (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 + C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all + U.S. Government End Users acquire Covered Software with only those + rights set forth herein. This U.S. Government Rights clause is in + lieu of, and supersedes, any other FAR, DFAR, or other clause or + provision that addresses Government rights in computer software + under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed + by the law of the jurisdiction specified in a notice contained + within the Original Software (except to the extent applicable law, + if any, provides otherwise), excluding such jurisdiction's + conflict-of-law provisions. Any litigation relating to this + License shall be subject to the jurisdiction of the courts located + in the jurisdiction and venue specified in a notice contained + within the Original Software, with the losing party responsible + for costs, including, without limitation, court costs and + reasonable attorneys' fees and expenses. The application of the + United Nations Convention on Contracts for the International Sale + of Goods is expressly excluded. Any law or regulation which + provides that the language of a contract shall be construed + against the drafter shall not apply to this License. You agree + that You alone are responsible for compliance with the United + States export administration regulations (and the export control + laws and regulation of any other countries) when You use, + distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or + indirectly, out of its utilization of rights under this License + and You agree to work with Initial Developer and Contributors to + distribute such responsibility on an equitable basis. Nothing + herein is intended or shall be deemed to constitute any admission + of liability. + +-------------------------------------------------------------------- + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND +DISTRIBUTION LICENSE (CDDL) + +For Covered Software in this distribution, this License shall +be governed by the laws of the State of California (excluding +conflict-of-law provisions). + +Any litigation relating to this License shall be subject to the +jurisdiction of the Federal Courts of the Northern District of +California and the state courts of the State of California, with +venue lying in Santa Clara County, California. diff --git a/LICENSE/README b/LICENSE/README @@ -0,0 +1,27 @@ +README for license conditions of the Heirloom Toolchest +======================================================= + +The Heirloom Toolchest is derived from a variety of sources; the +respective licensing terms can be found in the other files in this +directory; in addition, each source file contains the license terms +of the original author at its top. + +All newly written code is put under a zlib-style license (except for +additions to the GPL and LGPL code in awk and libuxre). The rationale +is that for something distributed as widely as Unix code, any license +that requires more than naming the author would only cause annoyance. + +In effect, this means that commercial Unix vendors who already have a +Unix source code license can use nearly all of this code without being +forced to mention it in other places than the source code files. + +However, if you work for such a vendor, don't do so. Instead, convince +the management to release at least the utility source. There is really +nothing to keep secret about it to have an advantage over competitors, +as any person or company can simply use the source of this or another +toolchest to have comparable functionality. So by releasing the source +to your version, you lose nothing, but you will make your users happy +since they can use it as a reference. And happy users also mean more +money in the end. + +Gunnar Ritter 9/22/03 diff --git a/README b/README @@ -0,0 +1,5 @@ +hbase is a collection of programs that complements sbase and ubase. It's meant +to be a temporary project that will shrink and die once sbase and ubase gets +implementations of most of the programs included. hbase mostly contains +programs taken from the Heirloom project, but also has other programs, such as +patch taken from FreeBSD/OpenBSD and mk taken from plan9port. diff --git a/_install/install.1b b/_install/install.1b @@ -0,0 +1,113 @@ +.\" +.\" Copyright (c) 2003 Gunnar Ritter +.\" +.\" This software is provided 'as-is', without any express or implied +.\" warranty. In no event will the authors be held liable for any damages +.\" arising from the use of this software. +.\" +.\" Permission is granted to anyone to use this software for any purpose, +.\" including commercial applications, and to alter it and redistribute +.\" it freely, subject to the following restrictions: +.\" +.\" 1. The origin of this software must not be misrepresented; you must not +.\" claim that you wrote the original software. If you use this software +.\" in a product, an acknowledgment in the product documentation would be +.\" appreciated but is not required. +.\" +.\" 2. Altered source versions must be plainly marked as such, and must not be +.\" misrepresented as being the original software. +.\" +.\" 3. This notice may not be removed or altered from any source distribution. +.\" Sccsid @(#)install.1b 1.3 (gritter) 4/17/03 +.TH INSTALL 1B "4/17/03" "Heirloom Toolchest" "BSD System Compatibility" +.SH NAME +install \- (BSD) install files +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/ucb/install\fR [\fB\-cs\fR] [\fB-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB-o\fI\ owner\fR] +\fIfile1 file2\fR +.HP +.ad l +.nh +\fB/usr/ucb/install\fR [\fB\-cs\fR] [\fB-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB-o\fI\ owner\fR] +\fIfile\fR\ .\ .\ .\ \fIdirectory\fR +.HP +.ad l +.nh +\fB/usr/ucb/install\fR \fB\-d\fR [\fB\-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB\-o\fI\ owner\fR] \fIdirectory\fR +.br +.ad b +.hy 1 +.SH DESCRIPTION +The +.I install +command copies one regular file to a destination file +or one or more regular files into a destination directory. +It is commonly used within Makefiles +to install newly created software components. +.PP +If the +.B \-d +option is present, +.I install +creates the named +.IR directory , +also creating non-existent parent directories. +It is not an error if the directory already exists. +The +.BR \-g , +.BR \-m , +and +.B \-o +options +apply to the last pathname component only; +attributes are set whether the directory is newly created or not. +Parent directories are always created using a default mode of 777 +minus umask +and default ownerships. +.PP +The following options are also accepted: +.TP 10 +.B \-c +This option is ignored and exists for compatibility only. +Ancient versions of this command removed the source file +unless this option was present. +.TP 10 +.B \-s +Strip the target files +(i.\|e. execute the +.IR strip (1) +command on them). +.TP 10 +\fB\-g\fI group\fR +Use the given +.I group +ownership for target files. +By default, +the group of the invoking user is used. +.TP 10 +\fB\-m\fI mode\fR +Set the access permissions of target files to octal +.IR mode . +By default, +mode 755 is used. +.TP 10 +\fB\-o\fI owner\fR +Specifies the +.I owner +of target files. +By default, +target files are owned by the invoking user. +.SH "SEE ALSO" +cp(1), +chgrp(1), +chmod(1), +chown(1), +make(1), +mkdir(1), +strip(1) diff --git a/_install/install.c b/_install/install.c @@ -0,0 +1,436 @@ +/* + * install - (BSD style) install files + * + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)/usr/ucb/install.sl 1.12 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <limits.h> +#include <pwd.h> +#include <grp.h> + +enum okay { + OKAY = 0, + STOP = 1 +}; + +static int mflag; /* -m option present */ +static int sflag; /* strip files */ +static mode_t mode = 0755; /* mode to set */ +static int dflag; /* create directories */ +static int gflag; /* set group */ +static gid_t group; /* group to set */ +static int oflag; /* set owner */ +static uid_t owner; /* owner to set */ +static int errcnt; /* count of errors */ +static char *progname; /* argv[0] to main */ + +void * +srealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +void * +smalloc(size_t size) +{ + return srealloc(NULL, size); +} + +uid_t +getowner(const char *string) +{ + struct passwd *pwd; + char *x; + long val; + + if ((pwd = getpwnam(string)) != NULL) + return pwd->pw_uid; + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, "%s: unknown user %s.\n", progname, string); + exit(1); + } + return val; +} + +gid_t +getgroup(const char *string) +{ + struct group *grp; + char *x; + long val; + + if ((grp = getgrnam(string)) != NULL) + return grp->gr_gid; + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, "%s: unknown group %s.\n", progname, string); + exit(1); + } + return val; +} + +void +getpath(const char *path, char **file, char **filend, size_t *sz, size_t *slen) +{ + *sz = 14 + strlen(path) + 2; + *file = smalloc(*sz); + *filend = *file; + if (path[0] == '/' && path[1] == '\0') + *(*filend)++ = '/'; + else { + const char *cp = path; + while ((*(*filend)++ = *cp++) != '\0'); + (*filend)[-1] = '/'; + } + *slen = *filend - *file; +} + +void +setpath(const char *base, char **file, char **filend, + size_t slen, size_t *sz, size_t *ss) +{ + if (slen + (*ss = strlen(base)) >= *sz) { + *sz += slen + *ss + 15; + *file = srealloc(*file, *sz); + *filend = &(*file)[slen]; + } + strcpy(*filend, base); +} + +void +fdcopy(const char *src, const struct stat *ssp, const int sfd, + const char *tgt, const struct stat *dsp, const int dfd) +{ + char *buf; + size_t bufsize; + ssize_t rsz, wo, wt; + + if ((bufsize = ssp->st_blksize) < dsp->st_blksize) + if ((bufsize = dsp->st_blksize) <= 0) + bufsize = 512; + buf = smalloc(bufsize); + while ((rsz = read(sfd, buf, bufsize)) > 0) { + wt = 0; + do { + if ((wo = write(dfd, buf + wt, rsz - wt)) < 0) { + fprintf(stderr, "%s: write: %s: %s\n", + progname, tgt, + strerror(errno)); + errcnt |= 01; + unlink(tgt); + free(buf); + return; + } + wt += wo; + } while (wt < rsz); + } + if (rsz < 0) { + fprintf(stderr, "%s: read: %s: %s\n", + progname, src, strerror(errno)); + errcnt |= 01; + unlink(tgt); + } + free(buf); +} + +static void +usage(void) +{ + fprintf(stderr, "\ +usage: %s [-cs] [-g group] [-m mode] [-o owner] file ... destination\n\ + %s -d [-g group] [-m mode] [-o owner] dir\n", + progname, progname); + exit(2); +} + +static void +strip(const char *file) +{ + const char cpr[] = "strip "; + char *cmd, *cp; + const char *sp; + + cp = cmd = smalloc(strlen(cpr) + strlen(file) + 1); + for (sp = cpr; *sp; sp++) + *cp++ = *sp; + for (sp = file; *sp; sp++) + *cp++ = *sp; + *cp = '\0'; + system(cmd); + free(cmd); +} + +static enum okay +chgown(const char *fn, struct stat *sp) +{ + struct stat st; + + if (sp == NULL) { + if (stat(fn, &st) < 0) { + fprintf(stderr, "%s: stat: %s: %s\n", + progname, fn, strerror(errno)); + errcnt |= 1; + return STOP; + } + sp = &st; + } + if (!oflag) + owner = sp->st_uid; + if (!gflag) + group = sp->st_gid; + if (chown(fn, owner, group) < 0) { + fprintf(stderr, "%s: chown: %s: %s\n", progname, fn, + strerror(errno)); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static enum okay +check(const char *src, const char *tgt, const struct stat *dsp, + struct stat *ssp) +{ + if (stat(src, ssp) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + return STOP; + } + if ((ssp->st_mode&S_IFMT) != S_IFREG && strcmp(src, "/dev/null")) { + fprintf(stderr, "%s: %s isn't a regular file.\n", + progname, src); + errcnt |= 01; + return STOP; + } + if (dsp && (ssp->st_dev == dsp->st_dev && ssp->st_ino == dsp->st_ino)) { + fprintf(stderr, "%s: %s and %s are the same file.\n", + progname, src, tgt); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +cp(const char *src, const char *tgt, struct stat *dsp) +{ + struct stat sst, nst; + int sfd, dfd; + + if (check(src, tgt, dsp, &sst) != OKAY) + return; + unlink(tgt); + if ((dfd = creat(tgt, 0700)) < 0 || fchmod(dfd, 0700) < 0 || + fstat(dfd, &nst) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + if (dfd >= 0) + close(dfd); + return; + } + if ((sfd = open(src, O_RDONLY)) < 0) { + fprintf(stderr, "%s: open: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + return; + } + fdcopy(src, &sst, sfd, tgt, &nst, dfd); + close(dfd); + close(sfd); + if (sflag) + strip(tgt); + if (oflag || gflag) + chgown(tgt, &nst); + if (chmod(tgt, mode) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, tgt, strerror(errno)); + errcnt |= 01; + } +} + +static void +installf(int ac, char **av) +{ + struct stat dst, ust; + + if (lstat(av[ac-1], &dst) == 0) { + if ((dst.st_mode&S_IFMT) != S_IFLNK || + stat(av[ac-1], &ust) < 0) + ust = dst; + if ((ust.st_mode&S_IFMT) == S_IFDIR) { + char *copy, *cend; + size_t sz, slen, ss; + int i; + + getpath(av[ac-1], &copy, &cend, &sz, &slen); + for (i = 0; i < ac-1; i++) { + setpath(basename(av[i]), &copy, &cend, + slen, &sz, &ss); + cp(av[i], copy, stat(copy, &dst) < 0 ? + NULL : &dst); + } + } else if (ac > 2) + usage(); + else + cp(av[0], av[1], &ust); + } else if (ac > 2) + usage(); + else + cp(av[0], av[1], NULL); +} + +static enum okay +makedir(const char *dir) +{ + struct stat st; + + if (mkdir(dir, 0777) < 0) { + if (errno == EEXIST) { + if (stat(dir, &st) < 0 || + (st.st_mode&S_IFMT) != S_IFDIR){ + fprintf(stderr, "%s: %s is not a directory\n", + progname, dir); + errcnt |= 01; + return STOP; + } + } else { + fprintf(stderr, "%s: mkdir: %s: %s\n", + progname, dir, strerror(errno)); + errcnt |= 01; + return STOP; + } + } + return OKAY; +} +static void +installd(char *dir) +{ + struct stat st; + int sgid_bit; + char *slash; + char c; + + slash = dir; + do { + while (*slash == '/') + slash++; + while (*slash != '/' && *slash != '\0') + slash++; + c = *slash; + *slash = '\0'; + if (makedir(dir) != OKAY) + return; + if (c == '\0') { + if (oflag || gflag) + if (chgown(dir, NULL) != OKAY) + return; + if (mflag) { + sgid_bit = stat(dir, &st) == 0 && + st.st_mode&S_ISGID ? S_ISGID : 0; + if (chmod(dir, mode | sgid_bit) < 0) { + fprintf(stderr, "%s: chmod: %s: %s\n", + progname, dir, + strerror(errno)); + errcnt |= 01; + return; + } + } + } + *slash = c; + } while (c != '\0'); +} + +int +main(int argc, char **argv) +{ + const char optstring[] = "csg:m:o:d"; + int i; + + progname = basename(argv[0]); + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'c': + /* no-op */ + break; + case 's': + sflag = 1; + break; + case 'g': + gflag = 1; + group = getgroup(optarg); + break; + case 'm': + mflag = 1; + mode = strtol(optarg, NULL, 8); + break; + case 'o': + oflag = 1; + owner = getowner(optarg); + break; + case 'd': + dflag = 1; + break; + default: + usage(); + } + } + if (dflag) { + if (argc == optind || argc > optind + 1) + usage(); + if (mflag) + mode &= ~(mode_t)S_ISGID; + installd(argv[optind]); + } else { + if (argc < optind + 2) + usage(); + installf(argc - optind, &argv[optind]); + } + return errcnt; +} diff --git a/_install/mkfile b/_install/mkfile @@ -0,0 +1,12 @@ +BIN = _install +TARG = __install +OBJ = install.o +INSTALL_BIN = install +INSTALL_MAN1b = install.1b +CLEAN_FILES = install + +<$mkbuild/mk.default + +__install:QV: $BIN + mv _install install + diff --git a/bc/bc.1 b/bc/bc.1 @@ -0,0 +1,222 @@ +.\" +.\" Sccsid @(#)bc.1 1.7 (gritter) 10/11/03 +.\" Derived from bc(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH BC 1 "10/11/03" "Heirloom Toolchest" "User Commands" +.SH NAME +bc \- arbitrary-precision arithmetic language +.SH SYNOPSIS +\fBbc\fR [\fB\-c\fR] [\fB\-l\fR] [\fIfile\fR ... ] +.SH DESCRIPTION +.I Bc +is an interactive processor for a language which resembles +C but provides unlimited precision arithmetic. +It takes input from any files given, then reads +the standard input. +The +.B \-l +argument stands for the name +of an arbitrary precision math library. +The syntax for +.I bc +programs is as follows; +L means letter a-z, +E means expression, S means statement. +.HP 6 +Comments +.br +are enclosed in /* and */. +.HP 6 +Names +.br +simple variables: L +.br +array elements: L [ E ] +.br +The words `ibase', `obase', and `scale' +.HP 6 +Other operands +.br +arbitrarily long numbers with optional sign and decimal point. +.br +( E ) +.br +sqrt ( E ) +.br +length ( E ) number of significant decimal digits +.br +scale ( E ) number of digits right of decimal point +.br +L ( E , ... , E ) +.HP 6 +Operators +.br ++ \- * / % ^ +(% is remainder; ^ is power) +.br +++ \-\- (prefix and postfix; apply to names) +.br +== <= >= != < > +.br += =+ =\- =* =/ =% =^ +.br +.HP 6 +Statements +.br +E +.br +{ S ; ... ; S } +.br +if ( E ) S +.br +while ( E ) S +.br +for ( E ; E ; E ) S +.br +null statement +.br +break +.br +quit +.HP 6 +Function definitions +.br +define L ( L ,..., L ) { +.br + auto L, ... , L +.br + S; ... S +.br + return ( E ) +.br +} +.HP 6 +Functions in +.B \-l +math library +.br +s(x) sine +.br +c(x) cosine +.br +e(x) exponential +.br +l(x) log +.br +a(x) arctangent +.br +j(n,x) Bessel function +.PP +.DT +All function arguments are passed by value. +.PP +The value of a statement that is an expression is printed +unless the main operator is an assignment. +Either semicolons or newlines may separate statements. +Assignment to +.I scale +influences the number of digits to be retained on arithmetic +operations in the manner of +.IR dc (1). +Assignments to +.I ibase +or +.I obase +set the input and output number radix respectively. +.PP +The same letter may be used as an array, a function, +and a simple variable simultaneously. +All variables are global to the program. +`Auto' variables are pushed down during function calls. +When using arrays as function arguments +or defining them as automatic variables +empty square brackets must follow the array name. +.PP +For example +.PP +.nf +scale = 20 +define e(x){ + auto a, b, c, i, s + a = 1 + b = 1 + s = 1 + for(i=1; 1==1; i++){ + a = a*x + b = b*i + c = a/b + if(c == 0) return(s) + s = s+c + } +} +.PP +.fi +defines a function to compute an approximate value of +the exponential function and +.PP +.nf + for(i=1; i<=10; i++) e(i) +.fi +.PP +prints approximate values of the exponential function of +the first ten integers. +.PP +.I Bc +is actually a preprocessor for +.IR dc (1), +which it invokes automatically, unless the +.B \-c +(compile only) +option is present. +In this case the +.I dc +input is sent to the standard output instead. +.SH FILES +.ta \w'/usr/5lib/lib.b 'u +/usr/5lib/lib.b mathematical library +.br +dc(1) desk calculator proper +.SH "SEE ALSO" +dc(1) +.br +L. L. Cherry and R. Morris, +.I +BC \- An arbitrary precision desk-calculator language +.SH BUGS +No &&, \(or\|\(or, or ! operators. +.br +.I For +statement must have all three E's. +.br +.I Quit +is interpreted when read, not when executed. diff --git a/bc/bc.y b/bc/bc.y @@ -0,0 +1,743 @@ +%{ +/* from 4.4BSD /usr/src/usr.bin/bc/bc.y */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from bc.y 8.1 (Berkeley) 6/6/93 + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)bc.sl 1.24 (gritter) 7/3/05"; +#include <unistd.h> +#include <signal.h> +#include <limits.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +typedef intptr_t YYSTYPE; +#define YYSTYPE YYSTYPE + static int cpeek(int c, int yes, int no); + static int getch(void); + static intptr_t bundle(int a, ...); + static void routput(intptr_t *p); + static void output(intptr_t *p); + static void conout(intptr_t p, intptr_t s); + static void pp(intptr_t); + static void tp(intptr_t); + static void yyinit(int argc, char *argv[]); + static intptr_t *getout(void); + static intptr_t *getf(intptr_t); + static intptr_t *geta(intptr_t); + static void yyerror(const char *); + static void cantopen(const char *); + extern int yylex(void); + +#if defined (__GLIBC__) && defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +%} +%right '=' +%left '+' '-' +%left '*' '/' '%' +%right '^' +%left UMINUS + +%term LETTER DIGIT SQRT LENGTH _IF FFF EQ +%term _WHILE _FOR NE LE GE INCR DECR +%term _RETURN _BREAK _DEFINE BASE OBASE SCALE +%term EQPL EQMI EQMUL EQDIV EQREM EQEXP +%term _AUTO DOT +%term QSTR + +%{ +#define THIS_BC_STRING_MAX 1000 +static FILE *in; +static char cary[LINE_MAX + 1], *cp = { cary }; +static char string[THIS_BC_STRING_MAX + 3], *str = {string}; +static int crs = '0'; +static int rcrs = '0'; /* reset crs */ +static int bindx = 0; +static int lev = 0; +static int ln; +static char *ss; +static int bstack[10] = { 0 }; +static char *numb[15] = { + " 0", " 1", " 2", " 3", " 4", " 5", + " 6", " 7", " 8", " 9", " 10", " 11", + " 12", " 13", " 14" }; +static intptr_t *pre, *post; +%} +%% +start : + | start stat tail + { output( (intptr_t *)$2 );} + | start def dargs ')' '{' dlist slist '}' + { bundle( 6,pre, $7, post ,"0",numb[lev],"Q"); + conout( $$, $2 ); + rcrs = crs; + output( (intptr_t *)"" ); + lev = bindx = 0; + } + ; + +dlist : tail + | dlist _AUTO dlets tail + ; + +stat : e + { bundle(2, $1, "ps." ); } + | + { bundle(1, "" ); } + | QSTR + { bundle(3,"[",$1,"]P");} + | LETTER '=' e + { bundle(3, $3, "s", $1 ); } + | LETTER '[' e ']' '=' e + { bundle(4, $6, $3, ":", geta($1)); } + | LETTER EQOP e + { bundle(6, "l", $1, $3, $2, "s", $1 ); } + | LETTER '[' e ']' EQOP e + { bundle(8,$3, ";", geta($1), $6, $5, $3, ":", geta($1));} + | _BREAK + { bundle(2, numb[lev-bstack[bindx-1]], "Q" ); } + | _RETURN '(' e ')' + { bundle(4, $3, post, numb[lev], "Q" ); } + | _RETURN '(' ')' + { bundle(4, "0", post, numb[lev], "Q" ); } + | _RETURN + { bundle(4,"0",post,numb[lev],"Q"); } + | SCALE '=' e + { bundle(2, $3, "k"); } + | SCALE EQOP e + { bundle(4,"K",$3,$2,"k"); } + | BASE '=' e + { bundle(2,$3, "i"); } + | BASE EQOP e + { bundle(4,"I",$3,$2,"i"); } + | OBASE '=' e + { bundle(2,$3,"o"); } + | OBASE EQOP e + { bundle(4,"O",$3,$2,"o"); } + | '{' slist '}' + { $$ = $2; } + | FFF + { bundle(1,"fY"); } + | error + { bundle(1,"c"); } + | _IF CRS BLEV '(' re ')' stat + { conout( $7, $2 ); + bundle(3, $5, $2, " " ); + } + | _WHILE CRS '(' re ')' stat BLEV + { bundle(3, $6, $4, $2 ); + conout( $$, $2 ); + bundle(3, $4, $2, " " ); + } + | fprefix CRS re ';' e ')' stat BLEV + { bundle(5, $7, $5, "s.", $3, $2 ); + conout( $$, $2 ); + bundle(5, $1, "s.", $3, $2, " " ); + } + | '~' LETTER '=' e + { bundle(3,$4,"S",$2); } + ; + +EQOP : EQPL + { $$ = (intptr_t)"+"; } + | EQMI + { $$ = (intptr_t)"-"; } + | EQMUL + { $$ = (intptr_t)"*"; } + | EQDIV + { $$ = (intptr_t)"/"; } + | EQREM + { $$ = (intptr_t)"%%"; } + | EQEXP + { $$ = (intptr_t)"^"; } + ; + +fprefix : _FOR '(' e ';' + { $$ = $3; } + ; + +BLEV : + { --bindx; } + ; + +slist : stat + | slist tail stat + { bundle(2, $1, $3 ); } + ; + +tail : '\n' + {ln++;} + | ';' + ; + +re : e EQ e + { bundle(3, $1, $3, "=" ); } + | e '<' e + { bundle(3, $1, $3, ">" ); } + | e '>' e + { bundle(3, $1, $3, "<" ); } + | e NE e + { bundle(3, $1, $3, "!=" ); } + | e GE e + { bundle(3, $1, $3, "!>" ); } + | e LE e + { bundle(3, $1, $3, "!<" ); } + | e + { bundle(2, $1, " 0!=" ); } + ; + +e : e '+' e + { bundle(3, $1, $3, "+" ); } + | e '-' e + { bundle(3, $1, $3, "-" ); } + | '-' e %prec UMINUS + { bundle(3, " 0", $2, "-" ); } + | e '*' e + { bundle(3, $1, $3, "*" ); } + | e '/' e + { bundle(3, $1, $3, "/" ); } + | e '%' e + { bundle(3, $1, $3, "%%" ); } + | e '^' e + { bundle(3, $1, $3, "^" ); } + | LETTER '[' e ']' + { bundle(3,$3, ";", geta($1)); } + | LETTER INCR + { bundle(4, "l", $1, "d1+s", $1 ); } + | INCR LETTER + { bundle(4, "l", $2, "1+ds", $2 ); } + | DECR LETTER + { bundle(4, "l", $2, "1-ds", $2 ); } + | LETTER DECR + { bundle(4, "l", $1, "d1-s", $1 ); } + | LETTER '[' e ']' INCR + { bundle(7,$3,";",geta($1),"d1+",$3,":",geta($1)); } + | INCR LETTER '[' e ']' + { bundle(7,$4,";",geta($2),"1+d",$4,":",geta($2)); } + | LETTER '[' e ']' DECR + { bundle(7,$3,";",geta($1),"d1-",$3,":",geta($1)); } + | DECR LETTER '[' e ']' + { bundle(7,$4,";",geta($2),"1-d",$4,":",geta($2)); } + | SCALE INCR + { bundle(1,"Kd1+k"); } + | INCR SCALE + { bundle(1,"K1+dk"); } + | SCALE DECR + { bundle(1,"Kd1-k"); } + | DECR SCALE + { bundle(1,"K1-dk"); } + | BASE INCR + { bundle(1,"Id1+i"); } + | INCR BASE + { bundle(1,"I1+di"); } + | BASE DECR + { bundle(1,"Id1-i"); } + | DECR BASE + { bundle(1,"I1-di"); } + | OBASE INCR + { bundle(1,"Od1+o"); } + | INCR OBASE + { bundle(1,"O1+do"); } + | OBASE DECR + { bundle(1,"Od1-o"); } + | DECR OBASE + { bundle(1,"O1-do"); } + | LETTER '(' cargs ')' + { bundle(4, $3, "l", getf($1), "x" ); } + | LETTER '(' ')' + { bundle(3, "l", getf($1), "x" ); } + | cons + { bundle(2, " ", $1 ); } + | DOT cons + { bundle(2, " .", $2 ); } + | cons DOT cons + { bundle(4, " ", $1, ".", $3 ); } + | cons DOT + { bundle(3, " ", $1, "." ); } + | DOT + { $$ = (intptr_t)"l."; } + | LETTER + { bundle(2, "l", $1 ); } + | LETTER '=' e + { bundle(3, $3, "ds", $1 ); } + | LETTER EQOP e %prec '=' + { bundle(6, "l", $1, $3, $2, "ds", $1 ); } + | LETTER '[' e ']' '=' e + { bundle(5,$6,"d",$3,":",geta($1)); } + | LETTER '[' e ']' EQOP e + { bundle(9,$3,";",geta($1),$6,$5,"d",$3,":",geta($1)); } + | LENGTH '(' e ')' + { bundle(2,$3,"Z"); } + | SCALE '(' e ')' + { bundle(2,$3,"X"); } /* must be before '(' e ')' */ + | '(' e ')' + { $$ = $2; } + | '?' + { bundle(1, "?" ); } + | SQRT '(' e ')' + { bundle(2, $3, "v" ); } + | '~' LETTER + { bundle(2,"L",$2); } + | SCALE '=' e + { bundle(2,$3,"dk"); } + | SCALE EQOP e %prec '=' + { bundle(4,"K",$3,$2,"dk"); } + | BASE '=' e + { bundle(2,$3,"di"); } + | BASE EQOP e %prec '=' + { bundle(4,"I",$3,$2,"di"); } + | OBASE '=' e + { bundle(2,$3,"do"); } + | OBASE EQOP e %prec '=' + { bundle(4,"O",$3,$2,"do"); } + | SCALE + { bundle(1,"K"); } + | BASE + { bundle(1,"I"); } + | OBASE + { bundle(1,"O"); } + ; + +cargs : eora + | cargs ',' eora + { bundle(2, $1, $3 ); } + ; +eora: e + | LETTER '[' ']' + {bundle(2,"l",geta($1)); } + ; + +cons : constant + { *cp++ = '\0'; } + +constant: + '_' + { $$ = (intptr_t)cp; *cp++ = '_'; } + | DIGIT + { $$ = (intptr_t)cp; *cp++ = $1; } + | constant DIGIT + { *cp++ = $2; } + ; + +CRS : + { $$ = (intptr_t)cp; *cp++ = crs++; *cp++ = '\0'; + if(crs == '[')crs+=3; + if(crs == 'a')crs='{'; + if(crs >= 0241){yyerror("program too big"); + getout(); + } + bstack[bindx++] = lev++; } + ; + +def : _DEFINE LETTER '(' + { $$ = (intptr_t)getf($2); + pre = (intptr_t *)""; + post = (intptr_t *)""; + lev = 1; + bstack[bindx=0] = 0; + } + ; + +dargs : + | lora + { pp( $1 ); } + | dargs ',' lora + { pp( $3 ); } + ; + +dlets : lora + { tp($1); } + | dlets ',' lora + { tp($3); } + ; +lora : LETTER + | LETTER '[' ']' + { $$ = (intptr_t)geta($1); } + ; + +%% +# define error 256 + +static int peekc = -1; +static int sargc; +static int ifile; +static char **sargv; + +static char funtab[52] = { + 01,0,02,0,03,0,04,0,05,0,06,0,07,0,010,0,011,0,012,0,013,0,014,0,015,0,016,0,017,0, + 020,0,021,0,022,0,023,0,024,0,025,0,026,0,027,0,030,0,031,0,032,0 }; +static char atab[52] = { + 0241,0,0242,0,0243,0,0244,0,0245,0,0246,0,0247,0,0250,0,0251,0,0252,0,0253,0, + 0254,0,0255,0,0256,0,0257,0,0260,0,0261,0,0262,0,0263,0,0264,0,0265,0,0266,0, + 0267,0,0270,0,0271,0,0272,0}; +static char *letr[26] = { + "a","b","c","d","e","f","g","h","i","j", + "k","l","m","n","o","p","q","r","s","t", + "u","v","w","x","y","z" } ; +/*static char *dot = { "." };*/ + +int +yylex(void){ + int c, ch; +restart: + c = getch(); + peekc = -1; + while( c == ' ' || c == '\t' ) c = getch(); + if(c == '\\'){ + getch(); + goto restart; + } + if( c<= 'z' && c >= 'a' ) { + /* look ahead to look for reserved words */ + peekc = getch(); + if( peekc >= 'a' && peekc <= 'z' ){ /* must be reserved word */ + if( c=='i' && peekc=='f' ){ c=_IF; goto skip; } + if( c=='w' && peekc=='h' ){ c=_WHILE; goto skip; } + if( c=='f' && peekc=='o' ){ c=_FOR; goto skip; } + if( c=='s' && peekc=='q' ){ c=SQRT; goto skip; } + if( c=='r' && peekc=='e' ){ c=_RETURN; goto skip; } + if( c=='b' && peekc=='r' ){ c=_BREAK; goto skip; } + if( c=='d' && peekc=='e' ){ c=_DEFINE; goto skip; } + if( c=='s' && peekc=='c' ){ c= SCALE; goto skip; } + if( c=='b' && peekc=='a' ){ c=BASE; goto skip; } + if( c=='i' && peekc == 'b'){ c=BASE; goto skip; } + if( c=='o' && peekc=='b' ){ c=OBASE; goto skip; } + if( c=='d' && peekc=='i' ){ c=FFF; goto skip; } + if( c=='a' && peekc=='u' ){ c=_AUTO; goto skip; } + if( c == 'l' && peekc=='e'){ c=LENGTH; goto skip; } + if( c == 'q' && peekc == 'u'){getout();} + /* could not be found */ + return( error ); + skip: /* skip over rest of word */ + peekc = -1; + while( (ch = getch()) >= 'a' && ch <= 'z' ); + peekc = ch; + return( c ); + } + + /* usual case; just one single letter */ + + yylval = (intptr_t)letr[c-'a']; + return( LETTER ); + } + if( c>= '0' && c <= '9' || c>= 'A' && c<= 'F' ){ + yylval = c; + return( DIGIT ); + } + switch( c ){ + case '.': return( DOT ); + case '=': + switch( peekc = getch() ){ + case '=': c=EQ; goto gotit; + case '+': c=EQPL; goto gotit; + case '-': c=EQMI; goto gotit; + case '*': c=EQMUL; goto gotit; + case '/': c=EQDIV; goto gotit; + case '%': c=EQREM; goto gotit; + case '^': c=EQEXP; goto gotit; + default: return( '=' ); + gotit: peekc = -1; return(c); + } + case '+': return( cpeek( '+', INCR, cpeek( '=', EQPL, '+') ) ); + case '-': return( cpeek( '-', DECR, cpeek( '=', EQMI, '-') ) ) ; + case '<': return( cpeek( '=', LE, '<' ) ); + case '>': return( cpeek( '=', GE, '>' ) ); + case '!': return( cpeek( '=', NE, '!' ) ); + case '/': + if((peekc = getch()) == '*'){ + peekc = -1; + while((getch() != '*') || ((peekc = getch()) != '/')); + peekc = -1; + goto restart; + } + else if (peekc == '=') { + c=EQDIV; + goto gotit; + } + else return(c); + case '*': + return( cpeek( '=', EQMUL, '*' ) ); + case '%': + return( cpeek( '=', EQREM, '%' ) ); + case '^': + return( cpeek( '=', EQEXP, '^' ) ); + case '"': + yylval = (intptr_t)str; + while((c=getch()) != '"'){*str++ = c; + if(str >= &string[sizeof string - 1]){yyerror("string space exceeded"); + getout(); + } + } + *str++ = '\0'; + return(QSTR); + default: return( c ); + } +} + +static int +cpeek(int c, int yes, int no){ + if( (peekc=getch()) != c ) return( no ); + else { + peekc = -1; + return( yes ); + } +} + +static int +getch(void){ + int ch; +loop: + ch = (peekc < 0) ? getc(in) : peekc; + peekc = -1; + if(ch != EOF)return(ch); + if(++ifile > sargc){ + if(ifile >= sargc+2)getout(); + in = stdin; + ln = 0; + goto loop; + } + fclose(in); + if((in = fopen(sargv[ifile],"r")) != NULL){ + ln = 0; + ss = sargv[ifile]; + goto loop; + } + cantopen(sargv[ifile]); + return EOF; +} +# define b_sp_max 3000 +static intptr_t b_space [ b_sp_max ]; +static intptr_t * b_sp_nxt = { b_space }; + +static int bdebug = 0; + +static intptr_t +bundle(int a, ...){ + intptr_t i, *q; + va_list ap; + + i = a; + q = b_sp_nxt; + if( bdebug ) printf("bundle %ld elements at %lo\n",(long)i, (long)q ); + va_start(ap, a); + while(i-- > 0){ + if( b_sp_nxt >= & b_space[b_sp_max] ) yyerror( "bundling space exceeded" ); + * b_sp_nxt++ = va_arg(ap, intptr_t); + } + va_end(ap); + * b_sp_nxt++ = 0; + yyval = (intptr_t)q; + return( (intptr_t)q ); +} + +static void +routput(intptr_t *p) { + if( bdebug ) printf("routput(%lo)\n", (long)p ); + if( p >= &b_space[0] && p < &b_space[b_sp_max]){ + /* part of a bundle */ + while( *p != 0 ) routput( (intptr_t *)*p++ ); + } + else printf( (char *)p ); /* character string */ +} + +static void +output(intptr_t *p) { + routput( p ); + b_sp_nxt = & b_space[0]; + printf( "\n" ); + fflush(stdout); + cp = cary; + crs = rcrs; +} + +static void +conout(intptr_t p, intptr_t s) { + printf("["); + routput( (intptr_t *)p ); + printf("]s%s\n", (char *)s ); + fflush(stdout); + lev--; +} + +static void +yyerror(const char *s) { + if(ifile > sargc)ss="teletype"; + fprintf(stderr, "%s on line %d, %s\n", + s ,ss?ln+1:0,ss?ss:"command line"); + cp = cary; + crs = rcrs; + bindx = 0; + lev = 0; + b_sp_nxt = &b_space[0]; +} + +static void +cantopen(const char *fn) +{ + char spc[280]; + char *oss = ss; + + ss = 0; + snprintf(spc, sizeof spc, "can't open input file %s", fn); + yyerror(spc); + ss = oss; +} + +static void +pp(intptr_t s) { + /* puts the relevant stuff on pre and post for the letter s */ + + bundle(3, "S", s, pre ); + pre = (intptr_t *)yyval; + bundle(4, post, "L", s, "s." ); + post = (intptr_t *)yyval; +} + +static void +tp(intptr_t s) { /* same as pp, but for temps */ + bundle(3, "0S", s, pre ); + pre = (intptr_t *)yyval; + bundle(4, post, "L", s, "s." ); + post = (intptr_t *)yyval; +} + +static void +yyinit(int argc,char **argv) { + signal(SIGINT, SIG_IGN); + sargv=argv; + sargc= -- argc; + if(sargc == 0)in=stdin; + else if((in = fopen(sargv[1],"r")) == NULL) { + cantopen(sargv[1]); + exit(0); + } + ifile = 1; + ln = 0; + ss = sargv[1]; +} + +static intptr_t * +getout(void){ + printf("q"); + fflush(stdout); + exit(0); + /*NOTREACHED*/ + return(NULL); +} + +static intptr_t * +getf(intptr_t p) { + return(intptr_t *)(&funtab[2*(*((char *)p) -0141)]); +} + +static intptr_t * +geta(intptr_t p) { + return(intptr_t *)(&atab[2*(*((char *)p) - 0141)]); +} + +int +main(int argc, char **argv) +{ + extern int yyparse(void); + const char optstring[] = "cdl"; + int p[2]; + int i; + int cflag = 0, lflag = 0; + + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'd': + case 'c': + cflag = 1; + break; + case 'l': + lflag = 1; + break; + default: + exit(2); + } + } + argv += optind - 1, argc -= optind - 1; + if (cflag) { + yyinit(argc, argv); + yyparse(); + exit(0); + } + if (lflag) { + *argv-- = LIBB; + argc++; + } + pipe(p); + if (fork()==0) { + close(1); + dup(p[1]); + close(p[0]); + close(p[1]); + yyinit(argc, argv); + yyparse(); + exit(0); + } + close(0); + dup(p[0]); + close(p[0]); + close(p[1]); + execl(DC, "dc", "-", NULL); + execl("/usr/5bin/dc", "dc", "-", NULL); + execl("/usr/local/bin/dc", "dc", "-", NULL); + execl("/usr/contrib/bin/dc", "dc", "-", NULL); + execl("/usr/bin/dc", "dc", "-", NULL); + return(1); +} diff --git a/bc/lib.b b/bc/lib.b @@ -0,0 +1,241 @@ +/* from 4.4BSD /usr/src/usr.bin/bc/bc.library */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from bc.library 8.1 (Berkeley) 6/6/93 + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)lib.b 1.4 (gritter) 8/26/02 */ + +scale = 20 +define e(x){ + auto a, b, c, d, e, g, t, w, y + + t = scale + scale = t + .434*x + 1 + + w = 0 + if(x<0){ + x = -x + w = 1 + } + y = 0 + while(x>2){ + x = x/2 + y = y + 1 + } + + a=1 + b=1 + c=b + d=1 + e=1 + for(a=1;1==1;a++){ + b=b*x + c=c*a+b + d=d*a + g = c/d + if(g == e){ + g = g/1 + while(y--){ + g = g*g + } + scale = t + if(w==1) return(1/g) + return(g/1) + } + e=g + } +} + +define l(x){ + auto a, b, c, d, e, f, g, u, s, t + if(x <=0) return(1-10^scale) + t = scale + + f=1 + scale = scale + scale(x) - length(x) + 1 + s=scale + while(x > 2){ + s = s + (length(x)-scale(x))/2 + 1 + if(s>0) scale = s + x = sqrt(x) + f=f*2 + } + while(x < .5){ + s = s + (length(x)-scale(x))/2 + 1 + if(s>0) scale = s + x = sqrt(x) + f=f*2 + } + + scale = t + length(f) - scale(f) + 1 + u = (x-1)/(x+1) + + scale = scale + 1.1*length(t) - 1.1*scale(t) + s = u*u + b = 2*f + c = b + d = 1 + e = 1 + for(a=3;1==1;a=a+2){ + b=b*s + c=c*a+d*b + d=d*a + g=c/d + if(g==e){ + scale = t + return(u*c/d) + } + e=g + } +} + +define s(x){ + auto a, b, c, s, t, y, p, n, i + t = scale + y = x/.7853 + s = t + length(y) - scale(y) + if(s<t) s=t + scale = s + p = a(1) + + scale = 0 + if(x>=0) n = (x/(2*p)+1)/2 + if(x<0) n = (x/(2*p)-1)/2 + x = x - 4*n*p + if(n%2!=0) x = -x + + scale = t + length(1.2*t) - scale(1.2*t) + y = -x*x + a = x + b = 1 + s = x + for(i=3; 1==1; i=i+2){ + a = a*y + b = b*i*(i-1) + c = a/b + if(c==0){scale=t; return(s/1)} + s = s+c + } +} + +define c(x){ + auto t + t = scale + scale = scale+1 + x = s(x+2*a(1)) + scale = t + return(x/1) +} + +define a(x){ + auto a, b, c, d, e, f, g, s, t + if(x==0) return(0) + if(x==1) { + if(scale<52) { +return(.7853981633974483096156608458198757210492923498437764/1) + } + } + t = scale + f=1 + while(x > .5){ + scale = scale + 1 + x= -(1-sqrt(1.+x*x))/x + f=f*2 + } + while(x < -.5){ + scale = scale + 1 + x = -(1-sqrt(1.+x*x))/x + f=f*2 + } + s = -x*x + b = f + c = f + d = 1 + e = 1 + for(a=3;1==1;a=a+2){ + b=b*s + c=c*a+d*b + d=d*a + g=c/d + if(g==e){ + scale = t + return(x*c/d) + } + e=g + } +} + +define j(n,x){ +auto a,b,c,d,e,g,i,s,k,t + + t = scale + k = 1.36*x + 1.16*t - n + k = length(k) - scale(k) + if(k>0) scale = scale + k + +s= -x*x/4 +if(n<0){ + n= -n + x= -x + } +a=1 +c=1 +for(i=1;i<=n;i++){ + a=a*x + c = c*2*i + } +b=a +d=1 +e=1 +for(i=1;1;i++){ + a=a*s + b=b*i*(n+i) + a + c=c*i*(n+i) + g=b/c + if(g==e){ + scale = t + return(g/1) + } + e=g + } +} diff --git a/bc/mkfile b/bc/mkfile @@ -0,0 +1,11 @@ +BIN = bc +OBJ = bc.o +LOCAL_CFLAGS = -DDC=\"$BINDIR/dc\" -DLIBB=\"$LIBDIR/lib.b\" +CLEAN_FILES = bc.c +INSTALL_BIN = bc +INSTALL_LIB = lib.b +INSTALL_MAN1 = bc.1 +DEPS = yacc + +<$mkbuild/mk.default + diff --git a/bc/yyval.sed b/bc/yyval.sed @@ -0,0 +1,22 @@ +# +# Sccsid @(#)yyval.sed 1.3 (gritter) 4/27/04 +# +# bison has a yacc-compatible yyval, but it is a local variable inside +# yyparse(). Making the variable global is necessary to make bc work +# with a bison-generated parser. +1,2 { + /Bison/ { + :look + /YYSTYPE/ { + a\ + YYSTYPE yyval; + :repl + s/^[ ]*YYSTYPE[ ]*yyval;// + n + t + b repl + } + n + b look + } +} diff --git a/cp/cp.1 b/cp/cp.1 @@ -0,0 +1,218 @@ +.\" +.\" Sccsid @(#)cp.1 1.26 (gritter) 5/3/05 +.\" Parts taken from cp(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH CP 1 "5/3/05" "Heirloom Toolchest" "User Commands" +.SH NAME +cp \- copy files +.SH SYNOPSIS +\fBcp\fR +[\fB\-adDfiHLpPrRs\fR] [\fB\-b\ \fIsize\fR] +\fIfile1\fR [\fIfile2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +.I File1 +is copied onto +.IR target . +If +.I target +is an existing regular file, +its content is overwritten. +Its mode and owner are preserved; +the mode of the source file is used otherwise. +.PP +If +.I target +is a directory, +one or more files are copied +into the directory with their original file-names. +.PP +.I Cp +refuses to copy a file onto itself. +.PP +The +.I cp +command accepts the following options: +.TP +.B \-i +.I Cp +will ask for confirmation +before overwriting an existing target file. +For +.B /usr/5bin/cp +and +.BR /usr/5bin/s42/cp , +.I cp +will also ask for confirmation before overwriting a directory +with the +.I \-r +or +.I \-R +option. +For +.BR /usr/5bin/cp , +this flag will be automatically disabled +if standard input is not a terminal. +.TP +.B \-p +.I Cp +will try to preserve access and modification times, +user and group ownerships, +and file permission bits. +Failing to preserve these modes +is always considered an error, +but only +.BR /usr/5bin/s42/cp , +.BR /usr/5bin/posix2001/cp , +and +.B /usr/5bin/posix/cp +will print an error message. +.TP +.B \-r +The source file operands may be directories +that will be copied recursively. +Symbolic links are followed. +The content of all non-directory files encountered +is tried to be reproduced in a regular file. +.PP +The following options have been added by POSIX.2: +.TP +.B \-f +If overwriting a target file fails, +.I cp +will try to unlink that file and proceed. +.TP +.B \-R +The source file operands may be directories +that will be copied recursively. +Special files +such as block and character devices, +FIFOs, +and symbolic links encountered during traversal +are recreated in the target hierarchy. +If a symbolic link is given as an operand, +its target is copied. +.PP +The following options have been added by POSIX.1-2001: +.TP +.B \-H +With the +.I \-R +option, follow symbolic links given as operands, +but do not follow symbolic links encountered during traversal +of the source hierarchy. +This is the default. +.TP +.B \-L +With the +.I \-R +option, follow all symbolic links. +.TP +.B \-P +With the +.I \-R +option, do not follow any symbolic links. +.PP +The following options are extensions: +.TP +.B \-a +Perform a recursive copy and, if possible, +preserve hard links as well as any attributes. +This is the same as the combination of the +.I \-Rdp +options. +.TP +\fB\-b\ \fIsize\fR +With this option given, +.I cp +performs input and output in units of +.I size +bytes. +The default size depends on the current input file. +.TP +.B \-d +With the +.I \-r +or +.I \-R +options, +hard links between copied files are usually splitted, +i.\|e. each copied file is assigned to a separate i-node. +When this option is given, +hard links between copied files +are reproduced in the destination hierarchy. +.TP +.B \-D +Causes +.I cp +to use direct i/o +when copying file data. +See the description of `O_DIRECT' in +.IR open (2) +for more information. +.TP +.B \-s +With this option, +.I cp +prints i/o statistics for each single file +of which data was copied. +.SH "SEE ALSO" +cat(1), +cpio(1), +mv(1), +pr(1), +rm(1) +.SH NOTES +Use either +.RS +.sp +.B cp +.B \-\- +.I \-file +.I target +.sp +.RE +or +.RS +.sp +.B cp +.I ./\-file +.I target +.sp +.RE +to copy files that begin with a hyphen character. +.PP +A copy of a symbolic link +contains the same pathname as the original. +Symbolic links with relative pathnames +may thus change or lose their target +if copied to a different level in the file hierarchy. diff --git a/cp/cp.c b/cp/cp.c @@ -0,0 +1,1264 @@ +/* + * cp - copy files + * + * Gunnar Ritter, Freiburg i. Br., Germany, July 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SUS) +static const char sccsid[] USED = "@(#)cp_sus.sl 1.84 (gritter) 3/4/06"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)cp_s42.sl 1.84 (gritter) 3/4/06"; +#else +static const char sccsid[] USED = "@(#)cp.sl 1.84 (gritter) 3/4/06"; +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <malloc.h> +#include <errno.h> +#include <libgen.h> +#include <limits.h> +#include <dirent.h> +#include <utime.h> +#include "sfile.h" +#include "memalign.h" +#include "alloca.h" + +#ifndef S_IFDOOR +#define S_IFDOOR 0xD000 /* Solaris door */ +#endif +#ifndef S_IFNAM +#define S_IFNAM 0x5000 /* XENIX special named file */ +#endif +#ifndef S_IFNWK +#define S_IFNWK 0x9000 /* HP-UX network special file */ +#endif + +static enum { + PERS_CP, + PERS_MV, + PERS_LN +} pers; + +enum okay { + OKAY = 0, + STOP = 1 +}; + +struct islot { + struct islot *i_lln; + struct islot *i_rln; + char *i_name; + ino_t i_ino; +}; + +struct dslot { + struct dslot *d_nxt; + struct islot *d_isl; + dev_t d_dev; +}; + +static struct dslot *d0; + +static unsigned errcnt; /* count of errors */ +static long bflag; /* buffer size */ +static int dflag; /* preserve hard links */ +#ifdef O_DIRECT +static int Dflag; /* use direct i/o */ +#endif /* O_DIRECT */ +static int fflag; /* force */ +static int iflag; /* ask before overwriting */ +static int nflag; /* ln: do not remove links */ +static int pflag; /* preserve owner and times */ +static int rflag; /* recursive, read FIFOs */ +static int Rflag; /* recursive, recreate FIFOs */ +static int sflag; /* make symlinks / show statistics */ +static int HLPflag; /* -H, -L, or -P */ +static int ontty; /* stdin is a terminal */ +static mode_t umsk; /* current umask */ +static uid_t myuid; /* current uid */ +static gid_t mygid; /* current gid */ +static char *progname; /* argv[0] to main() */ +static struct islot *inull; /* inode tree null element */ +static void (*go)(const char *, const char *, struct stat *, int, + int (*statfn)(const char *, struct stat *)); + +static mode_t +check_suid(const struct stat *sp, mode_t mode) +{ + if (sp->st_uid != myuid || sp->st_gid != mygid) { + mode &= ~(mode_t)S_ISUID; + if ((sp->st_mode&S_IFMT) != S_IFDIR || sp->st_mode&0010) + mode &= ~(mode_t)S_ISGID; + if ((sp->st_mode&S_IFMT) == S_IFDIR || sp->st_gid != mygid) + mode &= ~(mode_t)S_ISGID; + } + return mode; +} + +static void +nomem(void) +{ + write(2, progname, strlen(progname)); + write(2, ": Insufficient memory space.\n", 29); + _exit(077); +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) + nomem(); + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static void * +scalloc(size_t nelem, size_t nbytes) +{ + void *p; + + if ((p = calloc(nelem, nbytes)) == NULL) + nomem(); + return p; +} + +static void +usage(void) +{ + switch (pers) { + case PERS_CP: + fprintf(stderr, "\ +Usage: %s [-i] [-p] f1 f2\n\ + %s [-i] [-p] f1 ... fn d1\n\ + %s [-i] [-p] [-r] d1 d2\n", + progname, progname, progname); + break; + case PERS_MV: + fprintf(stderr, "\ +Usage: %s [-f] [-i] f1 f2\n\ + %s [-f] [-i] f1 ... fn d1\n\ + %s [-f] [-i] d1 d2\n", + progname, progname, progname); + break; + case PERS_LN: + { +#if defined (SUS) + const char nstr[] = ""; +#else /* !SUS */ + const char nstr[] = "[-n] "; +#endif /* !SUS */ + fprintf(stderr, "\ +Usage: %s [-f] %s[-s] f1 f2\n\ + %s [-f] %s[-s] f1 ... fn d1\n\ + %s [-f] %s[-s] d1 d2\n", + progname, nstr, progname, nstr, + progname, nstr); + } + break; + } + exit(2); +} + +static void +freeislots(struct islot *ip) +{ + if (ip == inull) + return; + freeislots(ip->i_lln); + freeislots(ip->i_rln); + free(ip->i_name); + free(ip); +} + +static void +freedslots(void) +{ + struct dslot *dp, *dn; + + for (dp = d0; dp; dp = dn) { + dn = dp->d_nxt; + freeislots(dp->d_isl); + free(dp); + } + d0 = NULL; +} + +static struct islot * +isplay(ino_t ino, struct islot *x) +{ + struct islot hdr; + struct islot *leftmax, *rightmin; + struct islot *y; + + hdr.i_lln = hdr.i_rln = inull; + leftmax = rightmin = &hdr; + inull->i_ino = ino; + while (ino != x->i_ino) { + if (ino < x->i_ino) { + if (ino < x->i_lln->i_ino) { + y = x->i_lln; + x->i_lln = y->i_rln; + y->i_rln = x; + x = y; + } + if (x->i_lln == inull) + break; + rightmin->i_lln = x; + rightmin = x; + x = x->i_lln; + } else { + if (ino > x->i_rln->i_ino) { + y = x->i_rln; + x->i_rln = y->i_lln; + y->i_lln = x; + x = y; + } + if (x->i_rln == inull) + break; + leftmax->i_rln = x; + leftmax = x; + x = x->i_rln; + } + } + leftmax->i_rln = x->i_lln; + rightmin->i_lln = x->i_rln; + x->i_lln = hdr.i_rln; + x->i_rln = hdr.i_lln; + inull->i_ino = !ino; + return x; +} + +static struct islot * +ifind(ino_t ino, struct islot **it) +{ + if (*it == NULL) + return NULL; + *it = isplay(ino, *it); + return (*it)->i_ino == ino ? *it : NULL; +} + +static void +iput(struct islot *ik, struct islot **it) +{ + if ((*it) == NULL) { + ik->i_lln = ik->i_rln = inull; + (*it) = ik; + } else { + /*(*it) = isplay(ik->i_ino, (*it));*/ + /* ifind() is always called before */ + if (ik->i_ino < (*it)->i_ino) { + ik->i_lln = (*it)->i_lln; + ik->i_rln = (*it); + (*it)->i_lln = inull; + (*it) = ik; + } else if ((*it)->i_ino < ik->i_ino) { + ik->i_rln = (*it)->i_rln; + ik->i_lln = (*it); + (*it)->i_rln = inull; + (*it) = ik; + } + } +} +static int +canlink(const char *path, const struct stat *sp) +{ + struct dslot *ds, *dp; + struct islot *ip; + + for (ds = d0, dp = NULL; ds; dp = ds, ds = ds->d_nxt) + if (ds->d_dev == sp->st_dev) + break; + if (ds == NULL) { + ds = scalloc(1, sizeof *ds); + ds->d_dev = sp->st_dev; + if (d0 == NULL) + d0 = ds; + else + dp->d_nxt = ds; + } + if ((ip = ifind(sp->st_ino, &ds->d_isl)) == NULL) { + ip = scalloc(1, sizeof *ip); + ip->i_name = smalloc(strlen(path) + 1); + strcpy(ip->i_name, path); + ip->i_ino = sp->st_ino; + iput(ip, &ds->d_isl); + } else { + if (link(ip->i_name, path) == 0) + return 1; + } + return 0; +} + +static enum okay +confirm(void) +{ + enum okay yes = STOP; + char c; + + if (read(0, &c, 1) == 1) { + yes = (c == 'y' || c == 'Y') ? OKAY : STOP; + while (c != '\n' && read(0, &c, 1) == 1); + } + return yes; +} + +static void +permissions(const char *path, const struct stat *ssp) +{ + mode_t mode; + + mode = ssp->st_mode & 07777; + if (pflag) { + struct utimbuf ut; + ut.actime = ssp->st_atime; + ut.modtime = ssp->st_mtime; + if (utime(path, &ut) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, "%s: cannot set times for %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + } + if (myuid == 0) { + if (chown(path, ssp->st_uid, ssp->st_gid) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, + "%s: cannot change owner and group of %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + mode &= ~(mode_t)(S_ISUID|S_ISGID); + } + } else + mode = check_suid(ssp, mode); + } else + mode = check_suid(ssp, mode & ~umsk); + if (chmod(path, mode) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, "%s: cannot set permissions for %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + } +} + +static size_t +balign(const struct stat *ssp, const struct stat *dsp, + long long size, size_t prefd) +{ + int n, m; + size_t s; + + n = (ssp->st_mode&S_IFMT) == S_IFREG && ssp->st_blksize >= 0 ? + ssp->st_blksize : 512; + m = (dsp->st_mode&S_IFMT) == S_IFREG && dsp->st_blksize >= 0 ? + dsp->st_blksize : 512; + if (prefd <= size && prefd % n == 0 && prefd % m == 0) + return prefd; + else if (n % m == 0) + return n; + else if (m % n == 0) + return m; + else { + s = n; + while (s % m) + s *= 2; + return s; + } +} + +/*ARGSUSED*/ +void +writerr(void *vp, int count, int written) +{ +} + +static long long +fdcopy(const char *src, const struct stat *ssp, const int sfd, + const char *tgt, const struct stat *dsp, const int dfd) +{ + static long pagesize; + static char *buf = NULL; + static size_t bufsize; + ssize_t rsz, wo, wt; + size_t blksize; + long long copied = 0; +#ifdef O_DIRECT + int sfl = 0, dfl = 0, haverest = 0, dioen = 0; + off_t remsz = 0; +#endif + +#ifdef __linux__ + if (!bflag && !Dflag && ssp->st_size > 0) { + long long sent; + + if ((sent = sfile(dfd, sfd, ssp->st_mode, ssp->st_size)) == + ssp->st_size) + return sent; + if (sent < 0) + goto err; + } +#endif /* __linux__ */ + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if (bflag) + blksize = bflag; +#ifdef O_DIRECT + else if (Dflag) + blksize = balign(ssp, dsp, ssp->st_size, 1048576); +#endif /* O_DIRECT */ + else + blksize = balign(ssp, dsp, ssp->st_size, 4096); + if (blksize > bufsize) { + if (buf) + free(buf); + if ((buf = memalign(pagesize, bufsize = blksize)) == 0) + nomem(); + } +#ifdef O_DIRECT + if (Dflag) { + if ((ssp->st_mode&S_IFMT) == S_IFREG && + ssp->st_size > blksize || + (ssp->st_mode&S_IFMT) == S_IFBLK) { + sfl = fcntl(sfd, F_GETFL); + fcntl(sfd, F_SETFL, sfl | O_DIRECT); + remsz = ssp->st_size; + } + if ((dsp->st_mode&S_IFMT) == S_IFREG || + (dsp->st_mode&S_IFMT) == S_IFBLK) { + dfl = fcntl(dfd, F_GETFL); + fcntl(dfd, F_SETFL, dfl | O_DIRECT); + dioen = 1; + } + } +#endif /* O_DIRECT */ + while ((rsz = read(sfd, buf, blksize)) > 0) { +#ifdef O_DIRECT + if (Dflag && rsz < blksize && dioen != 0) { + fcntl(dfd, F_SETFL, dfl); + dioen = 0; + } +#endif /* O_DIRECT */ + wt = 0; + do { + if ((wo = write(dfd, buf + wt, rsz - wt)) < 0) { +#ifdef __linux__ + err: +#endif /* __linux__ */ + fprintf(stderr, "%s: %s: write: %s\n", + progname, tgt, + strerror(errno)); + errcnt |= 04; +#ifdef notdef + if ((dsp->st_mode&S_IFMT) == S_IFREG) + unlink(tgt); +#endif /* notdef */ + return copied; + } + wt += wo; + copied += wo; + } while (wt < rsz); +#ifdef O_DIRECT + if (Dflag && ssp->st_size > blksize && + (ssp->st_mode&S_IFMT) == S_IFREG) { + remsz -= rsz; + if (remsz > 0 && remsz < blksize && + haverest == 0 && (bflag || + remsz<(blksize=balign(ssp, dsp, + ssp->st_size, 4096)))) { + fcntl(sfd, F_SETFL, sfl); + haverest = 1; + } + } +#endif /* O_DIRECT */ + } + if (rsz < 0) { + fprintf(stderr, "%s: %s: read: %s\n", + progname, src, strerror(errno)); + errcnt |= 04; +#ifdef notdef + if ((dsp->st_mode&S_IFMT) == S_IFREG) + unlink(tgt); +#endif /* notdef */ + } +#ifdef O_DIRECT + if (haverest) { +#if !defined (__FreeBSD__) && !defined (__DragonFly__) && !defined (__APPLE__) + fdatasync(dfd); +#else /* __FreeBSD__, __DragonFly__, __APPLE__ */ + fsync(dfd); +#endif /* __FreeBSD_, __DragonFly__, __APPLE__ */ + } +#endif /* O_DIRECT */ + return copied; +} + +static void +filecopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + struct stat stbuf; + mode_t mode; + int sfd, dfd; + float f, s, t; + struct timeval tv1, tv2; + struct rusage ru1, ru2; + long long copied = 0; + + if (sflag) { + gettimeofday(&tv1, NULL); + getrusage(RUSAGE_SELF, &ru1); + } + if ((sfd = open(src, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s\n%s: %s\n", + progname, src, + src, strerror(errno)); + errcnt |= 01; + return; + } + mode = check_suid(ssp, ssp->st_mode & 07777); + if ((dfd = creat(tgt, mode)) < 0) + if (pers != PERS_MV && dsp != NULL && fflag && unlink(tgt) == 0) + dfd = creat(tgt, mode); + if (dfd < 0) { + fprintf(stderr, "%s: cannot create %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + goto end1; + } + if (fstat(dfd, &stbuf) < 0) { + fprintf(stderr, "%s: fstat for %s failed: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 04; + goto end2; + } + copied = fdcopy(src, ssp, sfd, tgt, &stbuf, dfd); +end2: + if (pflag) + permissions(tgt, ssp); + if (close(dfd) < 0) { + fprintf(stderr, "%s: close error on %s: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 04; + } + if (sflag) { + gettimeofday(&tv2, NULL); + getrusage(RUSAGE_SELF, &ru2); +#define tv2f(tv) ((tv).tv_sec + (float)(tv).tv_usec / 1000000) + f = tv2f(tv2) - tv2f(tv1); + s = (float)copied / (2<<19); + t = f ? s / f : s; + printf(" ****** %s File Information ******\n" + " Input file : %s\n" + " Output file : %s\n" + " Real Time (secs) : %14.6f\n" + " User Time (secs) : %14.6f\n" + " System Time (secs) : %14.6f\n" + " File Size (MB) : %14.6f\n" + " Transfer Rate (MB/s) : %14.6f\n", + progname, src, tgt, + f, + tv2f(ru2.ru_utime) - tv2f(ru1.ru_utime), + tv2f(ru2.ru_stime) - tv2f(ru1.ru_stime), + s, + t); + } +end1: + close(sfd); +} + +static void +ignoring(const char *type, const char *path) +{ + fprintf(stderr, "%s: %signoring %s %s\n", progname, +#if defined (SUS) + "", +#else /* !SUS */ + "warning: ", +#endif /* !SUS */ + type, path); +#if defined (SUS) + if (pers == PERS_MV) + errcnt |= 020; +#endif /* SUS */ +} + +static enum okay +do_unlink(const char *tgt, const struct stat *dsp) +{ + if (dsp && unlink(tgt) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +devicecopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + if (do_unlink(tgt, dsp) != OKAY) + return; + if (mknod(tgt, check_suid(ssp, ssp->st_mode & (07777|S_IFMT)), + ssp->st_rdev) < 0) { + fprintf(stderr, "%s: cannot create special file %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (pflag) + permissions(tgt, ssp); +} + +static void +symlinkcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + static char *buf; + static size_t bufsize; + ssize_t sz; + + if (buf == NULL) + buf = smalloc(bufsize = 256); + for (;;) { + sz = readlink(src, buf, bufsize - 1); + if (sz < 0) { + fprintf(stderr, + "%s: cannot read symbolic link %s\n%s: %s\n", + progname, src, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (sz == bufsize - 1) { + buf = srealloc(buf, bufsize += 256); + continue; + } + buf[sz] = '\0'; + break; + } + if (do_unlink(tgt, dsp) != OKAY) + return; + if (symlink(buf, tgt) < 0) { + fprintf(stderr, "%s: cannot create symbolic link %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (myuid == 0 && lchown(tgt, ssp->st_uid, ssp->st_gid) < 0) { +#if defined (SUS) + fprintf(stderr, + "%s: cannot change owner and group of %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); +#endif /* SUS */ + if (pers != PERS_MV) + errcnt |= 010; + } +} + +static void +socketcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + int fd, addrsz; + struct sockaddr_un addr; + size_t len; + + if (do_unlink(tgt, dsp) != OKAY) + return; + len = strlen(tgt); + memset(&addr, 0, sizeof addr); + addr.sun_family = AF_UNIX; + addrsz = sizeof addr - sizeof addr.sun_path + len; + if ((len >= sizeof addr.sun_path ? errno = ENAMETOOLONG, fd = -1, 1 : + (strncpy(addr.sun_path,tgt,sizeof addr.sun_path), 0)) || + (fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(fd, (struct sockaddr *)&addr, addrsz) < 0) { + fprintf(stderr, "%s: cannot create socket %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + if (fd >= 0) + close(fd); + errcnt |= 01; + return; + } + close(fd); + if (pflag) + permissions(tgt, ssp); +} + +static void +specialcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + switch (ssp->st_mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFNAM: + case S_IFNWK: + devicecopy(src, ssp, tgt, dsp); + break; + case S_IFLNK: + symlinkcopy(src, ssp, tgt, dsp); + break; + case S_IFSOCK: + socketcopy(src, ssp, tgt, dsp); + break; + case S_IFDOOR: + ignoring("door", src); + break; + default: + fprintf(stderr, "%s: %s: unknown file type %o\n", + progname, src, (int)ssp->st_mode); + if (pers == PERS_MV) + errcnt |= 020; + } +} + +static void +getpath(const char *path, char **file, char **filend, size_t *sz, size_t *slen) +{ + *sz = 14 + strlen(path) + 2; + *file = smalloc(*sz); + *filend = *file; + if (path[0] == '/' && path[1] == '\0') + *(*filend)++ = '/'; + else { + register const char *cp = path; + while ((*(*filend)++ = *cp++) != '\0'); + (*filend)[-1] = '/'; + } + *slen = *filend - *file; +} + +static void +setpath(const char *base, char **file, char **filend, + size_t slen, size_t *sz, size_t *ss) +{ + if (slen + (*ss = strlen(base)) >= *sz) { + *sz += slen + *ss + 15; + *file = srealloc(*file, *sz); + *filend = &(*file)[slen]; + } + strcpy(*filend, base); +} + +static enum okay +trydelete(const char *path, int recursive) +{ + struct stat st; + enum okay val = OKAY; + + if (lstat(path, &st) < 0) { + fprintf(stderr, "%s: cannot stat %s for removal\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } else if ((st.st_mode & S_IFMT) == S_IFDIR) { + DIR *Dp; + + if (recursive == 0) + goto do_rmdir; + if ((Dp = opendir(path)) != NULL) { + struct dirent *dp; + char *copy, *cend; + size_t sz, slen, ss; + + getpath(path, &copy, &cend, &sz, &slen); + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + setpath(dp->d_name, &copy, &cend, + slen, &sz, &ss); + if ((val = trydelete(copy, recursive)) == STOP) + break; + } + free(copy); + closedir(Dp); + if (val != STOP) { +do_rmdir: + if (rmdir(path) < 0) { + fprintf(stderr, + "%s: cannot remove directory %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); + val = STOP; + } + } + } else { + fprintf(stderr, + "%s: cannot open directory %s for removal\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } + } else { + if (unlink(path) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } + } + return val; +} + +static enum okay +tryrename(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + if (dsp && !fflag) { + if (iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return STOP; + } else if (ontty && (dsp->st_mode&S_IFMT) != S_IFLNK && + access(tgt, W_OK) < 0) { + fprintf(stderr, "%s: %s: %o mode? ", + progname, tgt, + (int)(dsp->st_mode & 0777)); + if (confirm() != OKAY) + return STOP; + } + } + if (rename(src, tgt) == 0) + return STOP; + if (errno != EXDEV) { + fprintf(stderr, "%s: cannot rename %s to %s\n%s: %s\n", + progname, src, tgt, + progname, strerror(errno)); + errcnt |= 01; + return STOP; + } + if (dsp) { + if ((dsp->st_mode & S_IFMT) == S_IFDIR && + (ssp->st_mode & S_IFMT) != S_IFDIR) { + fprintf(stderr, "%s: <%s> directory\n", + progname, tgt); + errcnt |= 01; + return STOP; + } + if ((dsp->st_mode & S_IFMT) != S_IFDIR && + (ssp->st_mode & S_IFMT) == S_IFDIR) { + fprintf(stderr, "%s: Target must be directory\n", + progname); + errcnt |= 01; + return STOP; + } + } + if (dsp == NULL || trydelete(tgt, 0) == OKAY) + return OKAY; + return STOP; +} + +static enum okay +commoncheck(const char *src, const char *tgt, const struct stat *dsp, + struct stat *ssp, + int (*statfn)(const char *, struct stat *)) +{ + if (statfn(src, ssp) < 0) { + if (pers == PERS_LN && sflag) + return OKAY; + fprintf(stderr, "%s: cannot access %s\n", progname, src); + errcnt |= 01; + return STOP; + } + if (dsp && (ssp->st_dev == dsp->st_dev && ssp->st_ino == dsp->st_ino)) { + fprintf(stderr, "%s: %s and %s are identical\n", + progname, src, tgt); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +cpmv(const char *src, const char *tgt, struct stat *dsp, int level, + int (*statfn)(const char *, struct stat *)) +{ + struct stat sst; + + if (commoncheck(src, tgt, dsp, &sst, + Rflag && level == 0 ? + pers == PERS_MV || HLPflag == 'P' ? + lstat : stat : + statfn) != OKAY) + return; + if (pers == PERS_MV && level == 0) { + if (tryrename(src, &sst, tgt, dsp) == STOP) + return; + dsp = NULL; + } + if ((sst.st_mode & S_IFMT) == S_IFDIR) { + DIR *Dp; + struct dirent *dp; + char *scp, *send, *dcp, *dend; + size_t ssz, slen, sss, dsz, dlen, dss; + int destcreat = 0; + + if (rflag == 0) { + fprintf(stderr, "%s: <%s> directory\n", + progname, src); + errcnt |= 01; + return; + } + if (dsp && (dsp->st_mode & S_IFMT) != S_IFDIR) { + fprintf(stderr, "%s: %s: Not a directory.\n", + progname, tgt); + errcnt |= 01; + return; + } +#if !defined (SUS) + if (pers == PERS_CP && dsp != NULL && iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return; + } +#endif /* !SUS */ + if (dsp == NULL) { + if (mkdir(tgt, check_suid(&sst, + sst.st_mode&07777 | S_IRWXU)) < 0) { + fprintf(stderr, "%s: %s: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 01; + return; + } + destcreat = 1; + } + if ((Dp = opendir(src)) == NULL) { + fprintf(stderr, "%s: %s: %s\n", + progname, src, + strerror(errno)); + errcnt |= 01; + return; + } + getpath(src, &scp, &send, &ssz, &slen); + getpath(tgt, &dcp, &dend, &dsz, &dlen); + while ((dp = readdir(Dp)) != NULL) { + struct stat xst; + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + setpath(dp->d_name, &scp, &send, slen, &ssz, &sss); + setpath(dp->d_name, &dcp, &dend, dlen, &dsz, &dss); + go(scp, dcp, stat(dcp, &xst) < 0 ? NULL : &xst, + level + 1, statfn); + } + free(scp); + free(dcp); + if (destcreat) + permissions(tgt, &sst); + closedir(Dp); + } else { + if (dsp != NULL && iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return; + } + if (dflag && sst.st_nlink > 1) { + if (canlink(tgt, &sst)) + return; + } + if ((sst.st_mode & S_IFMT) == S_IFREG || Rflag == 0) + filecopy(src, &sst, tgt, dsp); + else + specialcopy(src, &sst, tgt, dsp); + } + if (pers == PERS_MV && errcnt == 0 && level == 0) + trydelete(src, 1); + if ((pers == PERS_CP || pers == PERS_MV) && level == 0 && d0) + freedslots(); +} + +/*ARGSUSED3*/ +static void +ln(const char *src, const char *tgt, struct stat *dsp, int level, + int (*statfn)(const char *, struct stat *)) +{ + struct stat sst; + int (*how)(const char *, const char *) = sflag ? symlink : link; + + if (commoncheck(src, tgt, dsp, &sst, statfn) != OKAY) + return; + if ((sst.st_mode&S_IFMT) == S_IFDIR && !sflag) { + fprintf(stderr, "%s: <%s> directory\n", progname, src); + errcnt |= 01; + return; + } +#if (defined (SUS) || defined (S42)) && (defined (__linux__) || defined (__sun)) + if (sflag == 0) { + char *rpbuf = alloca(PATH_MAX+1); + if (realpath(src, rpbuf) == NULL) { + fprintf(stderr, "%s: cannot access %s\n", + progname, src); + errcnt |= 01; + return; + } + src = rpbuf; + } +#endif /* (SUS || S42) && (__linux__ || __sun) */ + if (dsp +#if !defined (SUS) + && !sflag +#endif /* !SUS */ + ) { + if (nflag && !fflag) { + fprintf(stderr, "%s: %s: File exists\n", + progname, tgt); + errcnt |= 01; + return; + } + if (!fflag && ontty && (dsp->st_mode&S_IFMT) != S_IFLNK && + access(tgt, W_OK) < 0) { + fprintf(stderr, "%s: %s: %o mode? ", + progname, tgt, (int)(dsp->st_mode & 0777)); + if (confirm() != OKAY) + return; + } + if (unlink(tgt) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + } + if (how(src, tgt) < 0) { + if (sflag) + fprintf(stderr, "%s: cannot create %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + else if (errno == EXDEV) + fprintf(stderr, "%s: different file system\n", + progname); + else + fprintf(stderr, "%s: errno: %d no permission for %s\n", + progname, errno, tgt); + errcnt |= 01; + } +} + +static const char * +getfl(void) +{ + const char *optstring; + + if (progname[0] == 'm' && progname[1] == 'v') { + pers = PERS_MV; + optstring = "b:fi"; + dflag = pflag = rflag = Rflag = 1; + go = cpmv; + } else if (progname[0] == 'l' && progname[1] == 'n') { + pers = PERS_LN; + optstring = "fns"; +#if defined (SUS) + nflag = 1; +#endif /* SUS */ + go = ln; + } else { + pers = PERS_CP; + optstring = "ab:dDfiHLpPrRs"; + go = cpmv; + } + return optstring; +} + +int +main(int argc, char **argv) +{ + struct stat dst, ust; + const char *optstring; + int (*statfn)(const char *, struct stat *); + int i, illegal = 0; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + progname = basename(argv[0]); + optstring = getfl(); + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'b': + bflag = atol(optarg); + break; +#ifdef O_DIRECT + case 'D': + Dflag = 1; + break; +#endif /* O_DIRECT */ + case 'd': + dflag = 1; + break; + case 'f': + fflag = 1; +#if defined (SUS) + if (pers == PERS_MV) + iflag = 0; +#endif /* SUS */ + break; + case 'i': + iflag = 1; +#if defined (SUS) + if (pers == PERS_MV) + fflag = 0; +#endif /* SUS */ + break; + case 'n': + nflag = 1; + break; + case 'p': + pflag = 1; + break; + case 'a': + dflag = pflag = 1; + /*FALLTHRU*/ + case 'R': + Rflag = 1; + /*FALLTHRU*/ + case 'r': + rflag = 1; + break; + case 's': + sflag = 1; + break; + case 'H': + case 'L': + case 'P': + HLPflag = i; + break; + default: + illegal = 1; + } + } + argv += optind, argc -= optind; + if (argc < 2) { + fprintf(stderr, "%s: Insufficient arguments (%d)\n", + progname, argc); + illegal = 1; + } + if (illegal) + usage(); + umask(umsk = umask(0)); + ontty = isatty(0); +#if defined (SUS) + /* nothing */ +#elif defined (S42) + if (pers == PERS_MV && !ontty) + iflag = 0; +#else /* !SUS, !S42 */ + if (!ontty) + iflag = 0; +#endif /* !SUS, !S42 */ + myuid = geteuid(); + mygid = getegid(); + inull = scalloc(1, sizeof *inull); + inull->i_lln = inull->i_rln = inull; + statfn = (Rflag && HLPflag != 'L' +#if !defined (SUS) && !defined (S42) + || pers == PERS_LN +#endif /* !SUS && !S42 */ + ? lstat : stat); + if (lstat(argv[argc-1], &dst) == 0) { + if ((dst.st_mode&S_IFMT) != S_IFLNK || + stat(argv[argc-1], &ust) < 0) + ust = dst; + if ((ust.st_mode&S_IFMT) == S_IFDIR) { + char *copy, *cend; + size_t sz, slen, ss; + unsigned saverrs = errcnt; + + getpath(argv[argc-1], &copy, &cend, &sz, &slen); + for (i = 0; i < argc-1; i++) { + errcnt = 0; + setpath(basename(argv[i]), &copy, &cend, + slen, &sz, &ss); + go(argv[i], copy, statfn(copy, &dst) < 0 ? + NULL : &dst, 0, statfn); + saverrs |= errcnt; + } + errcnt = saverrs; + } else if (argc > 2) { + fprintf(stderr, "%s: Target must be directory\n", + progname); + usage(); + } else + go(argv[0], argv[1], pers == PERS_CP ? &ust : &dst, + 0, statfn); + } else if (argc > 2) { + fprintf(stderr, "%s: %s not found\n", progname, argv[argc-1]); + errcnt |= 01; + } else + go(argv[0], argv[1], NULL, 0, statfn); + return errcnt; +} diff --git a/cp/ln.1 b/cp/ln.1 @@ -0,0 +1,113 @@ +.\" +.\" Sccsid @(#)ln.1 1.11 (gritter) 2/2/05 +.\" Parts taken from ln(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH LN 1 "2/2/05" "Heirloom Toolchest" "User Commands" +.SH NAME +ln \- make a link +.SH SYNOPSIS +\fBln\fR [\fB\-f\fR] [\fB\-n\fR] [\fB\-s\fR] +\fIname1\fR [\fIname2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +A link is a directory entry referring to a file; +the same file +(together with its size, all its protection information, etc.) +may have several links to it. +There is no way to distinguish a link to a file +from its original directory entry; +any changes in the file +are effective independently of the name +by which the file is known. +.PP +.B Ln +creates a link named +.I target +to an existing file +.IR name1 . +If +.I target +is a directory, +more than one name may be given, +and the links are placed in that directory, +with the name of the last pathname component. +.PP +It is forbidden to link to a directory +or to link across file systems. +It is, however, possible +to create a +.I symbolic +.I link +even in this case; +see the +.B \-s +option below. +.PP +The +.B ln +command accepts the following options: +.TP +.B \-f +If the target file exists, +an attempt is made to unlink it +before the new link is created, +regardless of file permissions. +This option is ignored with +.B /usr/5bin/ln +and +.B /usr/5bin/s42/ln +if the +.I \-s +option is also given. +.TP +.B \-n +Do not remove an existing target file +even if the user has write permission on it. +This is the default for +.B /usr/5bin/posix/ls +and +.BR /usr/5bin/posix2001/ls . +.TP +.B \-s +Create a symbolic link, +that is a special file containing the pathname of the target file. +The system will resolve this pathname +when the symbolic link is accessed. +A symbolic link can refer to all types of files +and span device boundaries, +but will become stale if the target file is removed. +An existing target file will not be overwritten. +.SH "SEE ALSO" +cp(1), +mv(1), +link(2), +symlink(2) diff --git a/cp/mkfile b/cp/mkfile @@ -0,0 +1,9 @@ +BIN = cp +OBJ = cp.o +LOCAL_CFLAGS = -DSUS +INSTALL_BIN = cp +INSTALL_MAN1 = cp.1 +DEPS = libcommon + +<$mkbuild/mk.default + diff --git a/cp/mv.1 b/cp/mv.1 @@ -0,0 +1,179 @@ +.\" +.\" Sccsid @(#)mv.1 1.15 (gritter) 1/24/05 +.\" Parts taken from cp(1) and mv(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH MV 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +mv \- move or rename files and directories +.SH SYNOPSIS +\fBmv\fR [\fB\-f\fR] [\fB\-i\fR] [\fB\-b\ \fIsize\fR] +\fIfile1\fR [\fIfile2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +.B Mv +moves (changes the name of) +.I file1 +to +.IR target . +If +.I target +is an existing regular file, +its content is overwritten. +Its mode and owner are preserved; +the mode of the source file is used otherwise. +If +the mode of +.I target +forbids writing +(and standard input is terminal for +.B /usr/5bin/mv +and +.BR /usr/5bin/s42/mv ), +.B mv +prints the mode +(see +.IR chmod (2)) +and reads the standard input +to obtain a line; +if the line begins with y, +the move takes place; +if not, +the file is not moved. +.PP +If +.I target +is a directory, +one or more files are copied +into the directory with their original file-names. +.PP +.B Mv +refuses to move a file onto itself. +.PP +The +.B mv +command accepts the following options: +.TP +.B \-f +.B Mv +will not ask for confirmation +even if the modes of the +.I target +file do not permit writing. +Overrides the +.B \-i +option in +.B /usr/5bin/posix/mv +and +.BR /usr/5bin/posix2001/mv . +.TP +.B \-i +.B mv +will ask for confirmation +before overwriting an existing target. +For +.B /usr/5bin/mv +and +.BR /usr/5bin/s42/mv , +this flag will be automatically disabled +if standard input is not a terminal. +Overrides the +.B \-f +option in +.B /usr/5bin/posix/mv +and +.BR /usr/5bin/posix2001/mv . +.PP +The following option is an extension: +.TP +\fB\-b\ \fIsize\fR +When a regular file is moved to another file system, +its data must be copied. +This option overrides the automatically determined +i/o buffer size for such copies; +.I size +is given in bytes. +.SH "SEE ALSO" +cp(1), +cat(1), +pr(1), +unlink(2) +.SH NOTES +Use either +.RS +.sp +.B mv +.B \-\- +.I \-file +.I target +.sp +.RE +or +.RS +.sp +.B mv +.I ./\-file +.I target +.sp +.RE +to move files that begin with a hyphen character. +.PP +If source and target +lie on different file systems, +.B mv +must copy the file and delete the original. +In this case +any linking relationship with other files is lost, +but +.B mv +will preserve linkage inside the moved tree. +.B Mv +will try to preserve access and modification times, +user and group ownerships, +and file permission bits. +Failing to preserve these modes +is not considered an error, +only +.B /usr/5bin/posix/mv +and +.B /usr/5bin/posix2001/mv +will print an error message. +Special files +such as block and character devices, +FIFOs, +and symbolic links +are recreated in the target hierarchy. +.PP +The pathname contained in a symbolic link +is not changed when the link is moved. +Symbolic links with relative pathnames +may thus change or lose their target +if moved to a different level in the file hierarchy. diff --git a/dc/dc.1 b/dc/dc.1 @@ -0,0 +1,231 @@ +.\" +.\" Sccsid @(#)dc.1 1.5 (gritter) 1/11/03 +.\" Derived from dc(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH DC 1 "1/11/03" "Heirloom Toolchest" "User Commands" +.SH NAME +dc \- desk calculator +.SH SYNOPSIS +\fBdc\fR [\fIfile\fR] +.SH DESCRIPTION +.I Dc +is an arbitrary precision arithmetic package. +Ordinarily it operates on decimal integers, +but one may specify an input base, output base, +and a number of fractional digits to be maintained. +The overall structure of +.I dc +is +a stacking (reverse Polish) calculator. +If an argument is given, +input is taken from that file until its end, +then from the standard input. +The following constructions are recognized: +.HP 6 +number +.br +The value of the number is pushed on the stack. +A number is an unbroken string of the digits 0-9. +It may be preceded by an underscore _ to input a +negative number. +Numbers may contain decimal points. +.HP 6 ++ \- / * % ^ +.br +The +top two values on the stack are added +(+), +subtracted +(\-), +multiplied (*), +divided (/), +remaindered (%), +or exponentiated (^). +The two entries are popped off the stack; +the result is pushed on the stack in their place. +Any fractional part of an exponent is ignored. +.TP +.BI s x +The +top of the stack is popped and stored into +a register named +.I x, +where +.I x +may be any character. +If +the +.B s +is capitalized, +.I x +is treated as a stack and the value is pushed on it. +.TP +.BI l x +The +value in register +.I x +is pushed on the stack. +The register +.I x +is not altered. +All registers start with zero value. +If the +.B l +is capitalized, +register +.I x +is treated as a stack and its top value is popped onto the main stack. +.TP +.B d +The +top value on the stack is duplicated. +.TP +.B p +The top value on the stack is printed. +The top value remains unchanged. +.B P +interprets the top of the stack as an ascii string, +removes it, and prints it. +.TP +.B f +All values on the stack and in registers are printed. +.TP +.B q +exits the program. +If executing a string, the recursion level is +popped by two. +If +.B q +is capitalized, +the top value on the stack is popped and the string execution level is popped +by that value. +.TP +.B x +treats the top element of the stack as a character string +and executes it as a string of dc commands. +.TP +.B X +replaces the number on the top of the stack with its scale factor. +.TP +.B "[ ... ]" +puts the bracketed ascii string onto the top of the stack. +.HP 6 +.I "<x >x =x" +.br +The +top two elements of the stack are popped and compared. +Register +.I x +is executed if they obey the stated +relation. +.TP +.B v +replaces the top element on the stack by its square root. +Any existing fractional part of the argument is taken +into account, but otherwise the scale factor is ignored. +.TP +.B ! +interprets the rest of the line as a UNIX command. +.TP +.B c +All values on the stack are popped. +.TP +.B i +The top value on the stack is popped and used as the +number radix for further input. +.B I +pushes the input base on the top of the stack. +.TP +.B o +The top value on the stack is popped and used as the +number radix for further output. +.TP +.SM +.B O +pushes the output base on the top of the stack. +.TP +.B k +the top of the stack is popped, and that value is used as +a non-negative scale factor: +the appropriate number of places +are printed on output, +and maintained during multiplication, division, and exponentiation. +The interaction of scale factor, +input base, and output base will be reasonable if all are changed +together. +.TP +.B z +The stack level is pushed onto the stack. +.TP +.SM +.B Z +replaces the number on the top of the stack with its length. +.TP +.B ? +A line of input is taken from the input source (usually the terminal) +and executed. +.TP +.B "; :" +are used by +.I bc +for array operations. +.PP +An example which prints the first ten values of n! is +.nf +.PP +.in +3 +[la1+dsa*pla10>y]sy +.br +0sa1 +.br +lyx +.fi +.SH "SEE ALSO" +bc(1), +which is a preprocessor for +.I dc +providing infix notation and a C-like syntax +which implements functions and reasonable control +structures for programs. +.SH DIAGNOSTICS +`x is unimplemented' where x is an octal number. +.br +`stack empty' for not enough elements on the stack to do what was asked. +.br +`Out of space' when the free list is exhausted (too many digits). +.br +`Out of headers' for too many numbers being kept around. +.br +`Out of pushdown' for too many items on the stack. +.br +`Nesting Depth' for too many levels of nested execution. diff --git a/dc/dc.c b/dc/dc.c @@ -0,0 +1,2061 @@ +/* from 4.4BSD /usr/src/usr.bin/dc/dc.c */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from dc.c 8.1 (Berkeley) 6/6/93" + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* Sccsid @(#)dc.c 1.21 (gritter) 12/25/06> */ + +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include "sigset.h" +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> + +#include "dc.h" + +int +main(int argc,char **argv) +{ + init(argc,argv); + commnds(); + /*NOTREACHED*/ + return(0); +} + +void +commnds(void){ + register int c; + register struct blk *p,*q; + long l; + int sign; + struct blk **ptr,*s,*t; + struct sym *sp; + int sk,sk1,sk2; + int n,d; + + while(1){ + if(((c = readc())>='0' && c <= '9')|| (c>='A' && c <='F') || c == '.'){ + unreadc(c); + p = readin(); + pushp(p); + continue; + } + switch(c){ + case ' ': + case '\n': + case 0377: + case EOF: + continue; + case 'Y': + sdump("stk",*stkptr); + printf("all %ld rel %ld headmor %ld\n",all,rel,headmor); + printf("nbytes %ld\n",nbytes); + continue; + case '_': + p = readin(); + savk = sunputc(p); + chsign(p); + sputc(p,savk); + pushp(p); + continue; + case '-': + subt(); + continue; + case '+': + if(eqk() != 0)continue; + binop('+'); + continue; + case '*': + arg1 = pop(); + EMPTY; + arg2 = pop(); + EMPTYR(arg1); + sk1 = sunputc(arg1); + sk2 = sunputc(arg2); + binop('*'); + p = pop(); + sunputc(p); + savk = n = sk1+sk2; + if(n>k && n>sk1 && n>sk2){ + sk = sk1; + if(sk<sk2)sk = sk2; + if(sk<k)sk = k; + p = removc(p,n-sk); + savk = sk; + } + sputc(p,savk); + pushp(p); + continue; + case '/': +casediv: + if(dscale() != 0)continue; + binop('/'); + if(irem != 0)release(irem); + release(rem); + continue; + case '%': + if(dscale() != 0)continue; + binop('/'); + p = pop(); + release(p); + if(irem == 0){ + sputc(rem,skr+k); + pushp(rem); + continue; + } + p = add0(rem,skd-(skr+k)); + q = add(p,irem); + release(p); + release(irem); + sputc(q,skd); + pushp(q); + continue; + case 'v': + p = pop(); + EMPTY; + savk = sunputc(p); + if(length(p) == 0){ + sputc(p,savk); + pushp(p); + continue; + } + if((c = sbackc(p))<0){ + error("sqrt of neg number\n"); + } + if(k<savk)n = savk; + else{ + n = k*2-savk; + savk = k; + } + arg1 = add0(p,n); + arg2 = dcsqrt(arg1); + sputc(arg2,savk); + pushp(arg2); + continue; + case '^': + neg = 0; + arg1 = pop(); + EMPTY; + if(sunputc(arg1) != 0)error("exp not an integer\n"); + arg2 = pop(); + EMPTYR(arg1); + if(sfbeg(arg1) == 0 && sbackc(arg1)<0){ + neg++; + chsign(arg1); + } + if(length(arg1)>=3){ + error("exp too big\n"); + } + savk = sunputc(arg2); + p = dcexp(arg2,arg1); + release(arg2); + rewind(arg1); + c = sgetc(arg1); + if(sfeof(arg1) == 0) + c = sgetc(arg1)*100 + c; + d = c*savk; + release(arg1); + if(neg == 0){ + if(k>=savk)n = k; + else n = savk; + if(n<d){ + q = removc(p,d-n); + sputc(q,n); + pushp(q); + } + else { + sputc(p,d); + pushp(p); + } + } + else { + sputc(p,d); + pushp(p); + } + if(neg == 0)continue; + p = pop(); + q = salloc(2); + sputc(q,1); + sputc(q,0); + pushp(q); + pushp(p); + goto casediv; + case 'z': + p = salloc(2); + n = stkptr - stkbeg; + if(n >= 100){ + sputc(p,n/100); + n %= 100; + } + sputc(p,n); + sputc(p,0); + pushp(p); + continue; + case 'Z': + p = pop(); + EMPTY; + n = (length(p)-1)<<1; + fsfile(p); + sbackc(p); + if(sfbeg(p) == 0){ + if((c = sbackc(p))<0){ + n -= 2; + if(sfbeg(p) == 1)n += 1; + else { + if((c = sbackc(p)) == 0)n += 1; + else if(c > 90)n -= 1; + } + } + else if(c < 10) n -= 1; + } + release(p); + q = salloc(1); + if(n >= 100){ + sputc(q,n%100); + n /= 100; + } + sputc(q,n); + sputc(q,0); + pushp(q); + continue; + case 'i': + p = pop(); + EMPTY; + p = scalint(p); + release(inbas); + inbas = p; + continue; + case 'I': + p = copy(inbas,length(inbas)+1); + sputc(p,0); + pushp(p); + continue; + case 'o': + p = pop(); + EMPTY; + p = scalint(p); + sign = 0; + n = length(p); + q = copy(p,n); + fsfile(q); + l = c = sbackc(q); + if(n != 1){ + if(c<0){ + sign = 1; + chsign(q); + n = length(q); + fsfile(q); + l = c = sbackc(q); + } + if(n != 1){ + while(sfbeg(q) == 0)l = l*100+sbackc(q); + } + } + if (l > BC_BASE_MAX) + error("output base is too large\n"); + logo = log_2(l); + obase = l; + release(basptr); + if(sign == 1)obase = (long)-l; + basptr = p; + outdit = (int (*)(struct blk *, int, int))bigot; + if(n == 1 && sign == 0){ + if(c <= 16){ + outdit = (int (*)(struct blk *, int, int))hexot; + fw = 1; + fw1 = 0; + ll = 68; + release(q); + continue; + } + } + n = 0; + if(sign == 1)n++; + p = salloc(1); + sputc(p,-1); + t = add(p,q); + n += length(t)*2; + fsfile(t); + if((c = sbackc(t))>9)n++; + release(t); + release(q); + release(p); + fw = n; + fw1 = n-1; + ll = 68; + if(fw>=ll)continue; + ll = (68/fw)*fw; + continue; + case 'O': + p = copy(basptr,length(basptr)+1); + sputc(p,0); + pushp(p); + continue; + case '[': + n = 0; + p = salloc(0); + while(1){ + if((c = readc()) == ']'){ + if(n == 0)break; + n--; + } + sputc(p,c); + if(c == '[')n++; + } + pushp(p); + continue; + case 'k': + p = pop(); + EMPTY; + p = scalint(p); + if(length(p)>1){ + error("scale too big\n"); + } + rewind(p); + k = sfeof(p)?0:sgetc(p); + release(scalptr); + scalptr = p; + continue; + case 'K': + p = copy(scalptr,length(scalptr)+1); + sputc(p,0); + pushp(p); + continue; + case 'X': + p = pop(); + EMPTY; + fsfile(p); + n = sbackc(p); + release(p); + p = salloc(2); + sputc(p,n); + sputc(p,0); + pushp(p); + continue; + case 'Q': + p = pop(); + EMPTY; + if(length(p)>2){ + error("Q?\n"); + } + rewind(p); + if((c = sgetc(p))<0){ + error("neg Q\n"); + } + release(p); + while(c-- > 0){ + if(readptr == &readstk[0]){ + error("readstk?\n"); + } + if(*readptr != 0)release(*readptr); + readptr--; + } + continue; + case 'q': + if(readptr <= &readstk[1])exit(0); + if(*readptr != 0)release(*readptr); + readptr--; + if(*readptr != 0)release(*readptr); + readptr--; + continue; + case 'f': + if(stkptr == &stack[0])printf("empty stack\n"); + else { + for(ptr = stkptr; ptr > &stack[0];){ + print(*ptr--); + } + } + continue; + case 'p': + if(stkptr == &stack[0])printf("empty stack\n"); + else{ + print(*stkptr); + } + continue; + case 'P': + p = pop(); + EMPTY; + sputc(p,0); + printf("%s",p->beg); + release(p); + continue; + case 'd': + if(stkptr == &stack[0]){ + printf("empty stack\n"); + continue; + } + q = *stkptr; + n = length(q); + p = copy(*stkptr,n); + pushp(p); + continue; + case 'c': + while(stkerr == 0){ + p = pop(); + if(stkerr == 0)release(p); + } + continue; + case 'S': + if(stkptr == &stack[0]){ + error("save: args\n"); + } + c = readc() & 0377; + sptr = stable[c]; + sp = stable[c] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sp->next = sptr; + p = pop(); + EMPTY; + if(c >= ARRAYST){ + q = copy(p,length(p)); + for(n = 0;n < PTRSZ;n++)sputc(q,0); + release(p); + p = q; + } + sp->val = p; + continue; +sempty: + error("symbol table overflow\n"); + case 's': + if(stkptr == &stack[0]){ + error("save:args\n"); + } + c = readc() & 0377; + sptr = stable[c]; + if(sptr != 0){ + p = sptr->val; + if(c >= ARRAYST){ + rewind(p); + while(sfeof(p) == 0)release(dcgetwd(p)); + } + release(p); + } + else{ + sptr = stable[c] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sptr->next = 0; + } + p = pop(); + sptr->val = p; + continue; + case 'l': + load(); + continue; + case 'L': + c = readc() & 0377; + sptr = stable[c]; + if(sptr == 0){ + error("L?\n"); + } + stable[c] = sptr->next; + sptr->next = sfree; + sfree = sptr; + p = sptr->val; + if(c >= ARRAYST){ + rewind(p); + while(sfeof(p) == 0){ + q = dcgetwd(p); + if(q != 0)release(q); + } + } + pushp(p); + continue; + case ':': + p = pop(); + EMPTY; + q = scalint(p); + fsfile(q); + c = 0; + if((sfbeg(q) == 0) && ((c = sbackc(q))<0)){ + error("neg index\n"); + } + if(length(q)>2){ + error("index too big\n"); + } + if(sfbeg(q) == 0)c = c*100+sbackc(q); + if(c >= BC_DIM_MAX){ + error("index too big\n"); + } + release(q); + n = readc() & 0377; + sptr = stable[n]; + if(sptr == 0){ + sptr = stable[n] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sptr->next = 0; + p = salloc((c+PTRSZ)*PTRSZ); + zero(p); + } + else{ + p = sptr->val; + if(length(p)-PTRSZ < c*PTRSZ){ + q = copy(p,(c+PTRSZ)*PTRSZ); + release(p); + p = q; + } + } + seekc(p,c*PTRSZ); + q = lookwd(p); + if (q!=NULL) release(q); + s = pop(); + EMPTY; + salterwd((struct wblk *)p,s); + sptr->val = p; + continue; + case ';': + p = pop(); + EMPTY; + q = scalint(p); + fsfile(q); + c = 0; + if((sfbeg(q) == 0) && ((c = sbackc(q))<0)){ + error("neg index\n"); + } + if(length(q)>2){ + error("index too big\n"); + } + if(sfbeg(q) == 0)c = c*100+sbackc(q); + if(c >= BC_DIM_MAX){ + error("index too big\n"); + } + release(q); + n = readc() & 0377; + sptr = stable[n]; + if(sptr != 0){ + p = sptr->val; + if(length(p)-PTRSZ >= c*PTRSZ){ + seekc(p,c*PTRSZ); + s = dcgetwd(p); + if(s != 0){ + q = copy(s,length(s)); + pushp(q); + continue; + } + } + } + q = salloc(1); + sputc(q, 0); + pushp(q); + continue; + case 'x': +execute: + p = pop(); + EMPTY; + if((readptr != &readstk[0]) && (*readptr != 0)){ + if((*readptr)->rd == (*readptr)->wt) + release(*readptr); + else{ + if(readptr++ == &readstk[RDSKSZ]){ + error("nesting depth\n"); + } + } + } + else readptr++; + *readptr = p; + if(p != 0)rewind(p); + else{ + if((c = readc()) != '\n')unreadc(c); + } + continue; + case '?': + if(++readptr == &readstk[RDSKSZ]){ + error("nesting depth\n"); + } + *readptr = 0; + fsave = curfile; + curfile = stdin; + while((c = readc()) == '!')command(); + p = salloc(0); + sputc(p,c); + while((c = readc()) != '\n'){ + sputc(p,c); + if(c == '\\')sputc(p,readc()); + } + curfile = fsave; + *readptr = p; + continue; + case '!': + if(command() == 1)goto execute; + continue; + case '<': + case '>': + case '=': + if(cond(c) == 1)goto execute; + continue; + default: + printf("%o is unimplemented\n",c); + } + } +} + +struct blk * +div(struct blk *ddivd,struct blk *ddivr) +{ + int divsign,remsign,offset,divcarry = 0; + int carry, dig = 0,magic,d = 0,dd; + long c,td,cc; + struct blk *ps; + register struct blk *p,*divd,*divr; + + rem = 0; + p = salloc(0); + if(length(ddivr) == 0){ + pushp(ddivr); + printf("divide by 0\n"); + return NULL; + } + divsign = remsign = 0; + divr = ddivr; + fsfile(divr); + if(sbackc(divr) == -1){ + divr = copy(ddivr,length(ddivr)); + chsign(divr); + divsign = ~divsign; + } + divd = copy(ddivd,length(ddivd)); + fsfile(divd); + if(sfbeg(divd) == 0 && sbackc(divd) == -1){ + chsign(divd); + divsign = ~divsign; + remsign = ~remsign; + } + offset = length(divd) - length(divr); + if(offset < 0)goto ddone; + seekc(p,offset+1); + sputc(divd,0); + magic = 0; + fsfile(divr); + c = sbackc(divr); + if(c<10)magic++; + c = c*100 + (sfbeg(divr)?0:sbackc(divr)); + if(magic>0){ + c = (c*100 +(sfbeg(divr)?0:sbackc(divr)))*2; + c /= 25; + } + while(offset >= 0){ + fsfile(divd); + td = sbackc(divd)*100; + dd = sfbeg(divd)?0:sbackc(divd); + td = (td+dd)*100; + dd = sfbeg(divd)?0:sbackc(divd); + td = td+dd; + cc = c; + if(offset == 0)td += 1; + else cc += 1; + if(magic != 0)td = td<<3; + dig = td/cc; + rewind(divr); + rewind(divxyz); + carry = 0; + while(sfeof(divr) == 0){ + d = sgetc(divr)*dig+carry; + carry = d / 100; + salterc(divxyz,d%100); + } + salterc(divxyz,carry); + rewind(divxyz); + seekc(divd,offset); + carry = 0; + while(sfeof(divd) == 0){ + d = slookc(divd); + d = d-(sfeof(divxyz)?0:sgetc(divxyz))-carry; + carry = 0; + if(d < 0){ + d += 100; + carry = 1; + } + salterc(divd,d); + } + divcarry = carry; + sbackc(p); + salterc(p,dig); + sbackc(p); + if(--offset >= 0){ + if(d > 0){ + sbackc(divd); + dd=sbackc(divd); + salterc(divd,dd+100); + } + divd->wt--; + } + } + if(divcarry != 0){ + salterc(p,dig-1); + salterc(divd,-1); + ps = add(divr,divd); + release(divd); + divd = ps; + } + + rewind(p); + divcarry = 0; + while(sfeof(p) == 0){ + d = slookc(p)+divcarry; + divcarry = 0; + if(d >= 100){ + d -= 100; + divcarry = 1; + } + salterc(p,d); + } + if(divcarry != 0)salterc(p,divcarry); + fsfile(p); + while(sfbeg(p) == 0){ + if(sbackc(p) == 0)truncate(p); + else break; + } + if(divsign < 0)chsign(p); + fsfile(divd); + while(sfbeg(divd) == 0){ + if(sbackc(divd) == 0)truncate(divd); + else break; + } +ddone: + if(remsign<0)chsign(divd); + if(divr != ddivr)release(divr); + rem = divd; + return(p); +} + +int +dscale(void){ + register struct blk *dd,*dr; + register struct blk *r; + int c; + + dr = pop(); + EMPTYS; + dd = pop(); + EMPTYSR(dr); + fsfile(dd); + skd = sunputc(dd); + fsfile(dr); + skr = sunputc(dr); + if(sfbeg(dr) == 1 || (sfbeg(dr) == 0 && sbackc(dr) == 0)){ + sputc(dr,skr); + pushp(dr); + errorrt("divide by 0\n"); + } + c = k-skd+skr; + if(c < 0)r = removr(dd,-c); + else { + r = add0(dd,c); + irem = 0; + } + arg1 = r; + arg2 = dr; + savk = k; + return(0); +} + +struct blk * +removr(struct blk *p,int n) +{ + int nn; + register struct blk *q,*s,*r; + + rewind(p); + nn = (n+1)/2; + q = salloc(nn); + while(n>1){ + sputc(q,sgetc(p)); + n -= 2; + } + r = salloc(2); + while(sfeof(p) == 0)sputc(r,sgetc(p)); + release(p); + if(n == 1){ + s = dcdiv(r,tenptr); + release(r); + rewind(rem); + if(sfeof(rem) == 0)sputc(q,sgetc(rem)); + release(rem); + irem = q; + return(s); + } + irem = q; + return(r); +} + +struct blk * +sqrt(struct blk *p) +{ + struct blk *t; + struct blk *r,*q,*s; + int c,n,nn; + + n = length(p); + fsfile(p); + c = sbackc(p); + if((n&1) != 1)c = c*100+(sfbeg(p)?0:sbackc(p)); + n = (n+1)>>1; + r = salloc(n); + zero(r); + seekc(r,n); + nn=1; + while((c -= nn)>=0)nn+=2; + c=(nn+1)>>1; + fsfile(r); + sbackc(r); + if(c>=100){ + c -= 100; + salterc(r,c); + sputc(r,1); + } + else salterc(r,c); + while(1){ + q = dcdiv(p,r); + s = add(q,r); + release(q); + release(rem); + q = dcdiv(s,sqtemp); + release(s); + release(rem); + s = copy(r,length(r)); + chsign(s); + t = add(s,q); + release(s); + fsfile(t); + nn = sfbeg(t)?0:sbackc(t); + if(nn>=0)break; + release(r); + release(t); + r = q; + } + release(t); + release(q); + release(p); + return(r); +} + +struct blk * +exp(struct blk *base,struct blk *ex) +{ + register struct blk *r,*e,*p; + struct blk *e1,*t,*cp; + int temp,c,n; + r = salloc(1); + sputc(r,1); + p = copy(base,length(base)); + e = copy(ex,length(ex)); + fsfile(e); + if(sfbeg(e) != 0)goto edone; + temp=0; + c = sbackc(e); + if(c<0){ + temp++; + chsign(e); + } + while(length(e) != 0){ + e1=dcdiv(e,sqtemp); + release(e); + e = e1; + n = length(rem); + release(rem); + if(n != 0){ + e1=mult(p,r); + release(r); + r = e1; + } + t = copy(p,length(p)); + cp = mult(p,t); + release(p); + release(t); + p = cp; + } + if(temp != 0){ + if((c = length(base)) == 0){ + goto edone; + } + if(c>1)create(r); + else{ + rewind(base); + if((c = sgetc(base))<=1){ + create(r); + sputc(r,c); + } + else create(r); + } + } +edone: + release(p); + release(e); + return(r); +} + +void +init(int argc,char **argv) +{ + register struct sym *sp; + + if (sigset(SIGINT, SIG_IGN) != SIG_IGN) + sigset(SIGINT,onintr); + setbuf(stdout,(char *)NULL); + svargc = --argc; + svargv = argv; + while(svargc>0 && svargv[1][0] == '-'){ + switch(svargv[1][1]){ + default: + dbg=1; + } + svargc--; + svargv++; + } + ifile=1; + if(svargc<=0)curfile = stdin; + else if((curfile = fopen(svargv[1],"r")) == NULL){ + printf("can't open file %s\n",svargv[1]); + exit(1); + } + scalptr = salloc(1); + sputc(scalptr,0); + basptr = salloc(1); + sputc(basptr,10); + obase=10; + log_10=log_2(10L); + ll=68; + fw=1; + fw1=0; + tenptr = salloc(1); + sputc(tenptr,10); + obase=10; + inbas = salloc(1); + sputc(inbas,10); + sqtemp = salloc(1); + sputc(sqtemp,2); + chptr = salloc(0); + strptr = salloc(0); + divxyz = salloc(0); + stkbeg = stkptr = &stack[0]; + stkend = &stack[STKSZ]; + stkerr = 0; + readptr = &readstk[0]; + k=0; + sp = sptr = &symlst[0]; + while(sptr < &symlst[TBLSZ]){ + sptr->next = ++sp; + sptr++; + } + sptr->next=0; + sfree = &symlst[0]; + return; +} + +void +onintr(int signum){ + + sigset(SIGINT,onintr); + while(readptr != &readstk[0]){ + if(*readptr != 0){release(*readptr);} + readptr--; + } + curfile = stdin; + commnds(); +} + +void +pushp(struct blk *p) +{ + if(stkptr == stkend){ + printf("out of stack space\n"); + return; + } + stkerr=0; + *++stkptr = p; + return; +} + +struct blk * +pop(void){ + if(stkptr == stack){ + stkerr=1; + return(0); + } + return(*stkptr--); +} + +struct blk * +readin(void){ + register struct blk *p,*q; + int dp,dpct; + register int c; + + dp = dpct=0; + p = salloc(0); + while(1){ + c = readc(); + switch(c){ + case '.': + if(dp != 0){ + unreadc(c); + break; + } + dp++; + continue; + case '\\': + readc(); + continue; + default: + if(c >= 'A' && c <= 'F')c = c - 'A' + 10; + else if(c >= '0' && c <= '9')c -= '0'; + else goto gotnum; + if(dp != 0){ + if(dpct >= 99)continue; + dpct++; + } + create(chptr); + if(c != 0)sputc(chptr,c); + q = mult(p,inbas); + release(p); + p = add(chptr,q); + release(q); + } + } +gotnum: + unreadc(c); + if(dp == 0){ + sputc(p,0); + return(p); + } + else{ + q = scale(p,dpct); + return(q); + } +} + +struct blk * +add0(struct blk *p,int ct) +{ + /* returns pointer to struct with ct 0's & p */ + register struct blk *q,*t; + + q = salloc(length(p)+(ct+1)/2); + while(ct>1){ + sputc(q,0); + ct -= 2; + } + rewind(p); + while(sfeof(p) == 0){ + sputc(q,sgetc(p)); + } + release(p); + if(ct == 1){ + t = mult(tenptr,q); + release(q); + return(t); + } + return(q); +} + +struct blk * +mult(struct blk *p,struct blk *q) +{ + register struct blk *mp,*mq,*mr; + int sign,offset,carry; + int cq,cp,mt,mcr; + + offset = sign = 0; + fsfile(p); + mp = p; + if(sfbeg(p) == 0){ + if(sbackc(p)<0){ + mp = copy(p,length(p)); + chsign(mp); + sign = ~sign; + } + } + fsfile(q); + mq = q; + if(sfbeg(q) == 0){ + if(sbackc(q)<0){ + mq = copy(q,length(q)); + chsign(mq); + sign = ~sign; + } + } + mr = salloc(length(mp)+length(mq)); + zero(mr); + rewind(mq); + while(sfeof(mq) == 0){ + cq = sgetc(mq); + rewind(mp); + rewind(mr); + mr->rd += offset; + carry=0; + while(sfeof(mp) == 0){ + cp = sgetc(mp); + mcr = sfeof(mr)?0:slookc(mr); + mt = cp*cq + carry + mcr; + carry = mt/100; + salterc(mr,mt%100); + } + offset++; + if(carry != 0){ + mcr = sfeof(mr)?0:slookc(mr); + salterc(mr,mcr+carry); + } + } + if(sign < 0){ + chsign(mr); + } + if(mp != p)release(mp); + if(mq != q)release(mq); + return(mr); +} + +void +chsign(struct blk *p) +{ + register int carry; + register char ct; + + carry=0; + rewind(p); + while(sfeof(p) == 0){ + ct=100-slookc(p)-carry; + carry=1; + if(ct>=100){ + ct -= 100; + carry=0; + } + salterc(p,ct); + } + if(carry != 0){ + sputc(p,-1); + fsfile(p); + sbackc(p); + ct = sbackc(p); + if(ct == 99){ + truncate(p); + sputc(p,-1); + } + } + else{ + fsfile(p); + ct = sbackc(p); + if(ct == 0)truncate(p); + } + return; +} + +int +readc(void){ +loop: + if((readptr != &readstk[0]) && (*readptr != 0)){ + if(sfeof(*readptr) == 0)return(lastchar = sgetc(*readptr)); + release(*readptr); + readptr--; + goto loop; + } + lastchar = getc(curfile); + if(lastchar != EOF)return(lastchar); + if(readptr != &readptr[0]){ + readptr--; + if(*readptr == 0)curfile = stdin; + goto loop; + } + if(curfile != stdin){ + fclose(curfile); + curfile = stdin; + goto loop; + } + exit(0); +} + +void +unreadc(char c) +{ + + if((readptr != &readstk[0]) && (*readptr != 0)){ + sungetc(*readptr,c); + } + else ungetc(c,curfile); + return; +} + +void +binop(char c) +{ + register struct blk *r = NULL; + + switch(c){ + case '+': + r = add(arg1,arg2); + break; + case '*': + r = mult(arg1,arg2); + break; + case '/': + r = dcdiv(arg1,arg2); + break; + } + release(arg1); + release(arg2); + sputc(r,savk); + pushp(r); + return; +} + +void +print(struct blk *hptr) +{ + int sc; + register struct blk *p,*q,*dec; + int dig,dout,ct; + + rewind(hptr); + while(sfeof(hptr) == 0){ + if(sgetc(hptr)>99){ + rewind(hptr); + while(sfeof(hptr) == 0){ + printf("%c",sgetc(hptr)); + } + printf("\n"); + return; + } + } + fsfile(hptr); + sc = sbackc(hptr); + if(sfbeg(hptr) != 0){ + printf("0\n"); + return; + } + count = ll; + p = copy(hptr,length(hptr)); + sunputc(p); + fsfile(p); + if(sbackc(p)<0){ + chsign(p); + OUTC('-'); + } + if((obase == 0) || (obase == -1)){ + oneot(p,sc,'d'); + return; + } + if(obase == 1){ + oneot(p,sc,'1'); + return; + } + if(obase == 10){ + tenot(p,sc); + return; + } + create(strptr); + dig = log_10*sc; + dout = ((dig/10) + dig) /logo; + dec = getdec(p,sc); + p = removc(p,sc); + while(length(p) != 0){ + q = dcdiv(p,basptr); + release(p); + p = q; + (*outdit)(rem,0,1); + } + release(p); + fsfile(strptr); + while(sfbeg(strptr) == 0)OUTC(sbackc(strptr)); + if(sc == 0){ + release(dec); + printf("\n"); + return; + } + create(strptr); + OUTC('.'); + ct=0; + do{ + q = mult(basptr,dec); + release(dec); + dec = getdec(q,sc); + p = removc(q,sc); + (*outdit)(p,1,ct+1<dout); + }while(++ct < dout); + release(dec); + rewind(strptr); + while(sfeof(strptr) == 0)OUTC(sgetc(strptr)); + printf("\n"); + return; +} + +struct blk * +getdec(struct blk *p,int sc) +{ + int cc; + register struct blk *q,*t,*s; + + rewind(p); + if(length(p)*2 < sc){ + q = copy(p,length(p)); + return(q); + } + q = salloc(length(p)); + while(sc >= 1){ + sputc(q,sgetc(p)); + sc -= 2; + } + if(sc != 0){ + t = mult(q,tenptr); + s = salloc(cc = length(q)); + release(q); + rewind(t); + while(cc-- > 0)sputc(s,sgetc(t)); + sputc(s,0); + release(t); + t = dcdiv(s,tenptr); + release(s); + release(rem); + return(t); + } + return(q); +} + +void +tenot(struct blk *p,int sc) +{ + register int c,f; + char b[3]; + + fsfile(p); + f=0; + while((sfbeg(p) == 0) && ((p->rd-p->beg-1)*2 >= sc)){ + c = sbackc(p); + if((c<10) && (f == 1))snprintf(b, sizeof b, "0%d",c); + else snprintf(b, sizeof b, "%d",c); + f=1; + TEST2(b); + } + if(sc == 0){ + printf("\n"); + release(p); + return; + } + if((p->rd-p->beg)*2 > sc){ + c = sbackc(p); + snprintf(b, sizeof b, "%d.",c/10); + TEST2(b); + OUTC(c%10 +'0'); + sc--; + } + else { + OUTC('.'); + } + if(sc > (p->rd-p->beg)*2){ + while(sc>(p->rd-p->beg)*2){ + OUTC('0'); + sc--; + } + } + while(sc > 1){ + c = sbackc(p); + if(c<10)snprintf(b, sizeof b, "0%d",c); + else snprintf(b, sizeof b, "%d",c); + sc -= 2; + TEST2(b); + } + if(sc == 1){ + OUTC(sbackc(p)/10 +'0'); + } + printf("\n"); + release(p); + return; +} + +void +oneot(struct blk *p,int sc,char ch) +{ + register struct blk *q; + + q = removc(p,sc); + create(strptr); + sputc(strptr,-1); + while(length(q)>0){ + p = add(strptr,q); + release(q); + q = p; + OUTC(ch); + } + release(q); + printf("\n"); + return; +} + +void +hexot(struct blk *p,int flg,int unused) +{ + register int c; + rewind(p); + if(sfeof(p) != 0){ + sputc(strptr,'0'); + release(p); + return; + } + c = sgetc(p); + release(p); + if(c >= 16){ + printf("hex digit > 16"); + return; + } + sputc(strptr,c<10?c+'0':c-10+'A'); + return; +} + +void +bigot(struct blk *p,int flg,int putspc) +{ + register struct blk *t,*q; + register int l = 0; + int neg; + + if(flg == 1)t = salloc(0); + else{ + t = strptr; + l = length(strptr)+fw-1; + } + neg=0; + if(length(p) != 0){ + fsfile(p); + if(sbackc(p)<0){ + neg=1; + chsign(p); + } + while(length(p) != 0){ + q = dcdiv(p,tenptr); + release(p); + p = q; + rewind(rem); + sputc(t,sfeof(rem)?'0':sgetc(rem)+'0'); + release(rem); + } + } + release(p); + if(flg == 1){ + l = fw1-length(t); + if(neg != 0){ + l--; + sputc(strptr,'-'); + } + fsfile(t); + while(l-- > 0)sputc(strptr,'0'); + while(sfbeg(t) == 0)sputc(strptr,sbackc(t)); + release(t); + } + else{ + l -= length(strptr); + while(l-- > 0)sputc(strptr,'0'); + if(neg != 0){ + sunputc(strptr); + sputc(strptr,'-'); + } + } + if (putspc) + sputc(strptr,' '); + return; +} + +struct blk * +add(struct blk *a1,struct blk *a2) +{ + register struct blk *p; + register int carry,n; + int size; + int c = 0,n1,n2; + + size = length(a1)>length(a2)?length(a1):length(a2); + p = salloc(size); + rewind(a1); + rewind(a2); + carry=0; + while(--size >= 0){ + n1 = sfeof(a1)?0:sgetc(a1); + n2 = sfeof(a2)?0:sgetc(a2); + n = n1 + n2 + carry; + if(n>=100){ + carry=1; + n -= 100; + } + else if(n<0){ + carry = -1; + n += 100; + } + else carry = 0; + sputc(p,n); + } + if(carry != 0)sputc(p,carry); + fsfile(p); + if(sfbeg(p) == 0){ + while(sfbeg(p) == 0 && (c = sbackc(p)) == 0); + if(c != 0)salterc(p,c); + truncate(p); + } + fsfile(p); + if(sfbeg(p) == 0 && sbackc(p) == -1){ + while((c = sbackc(p)) == 99){ + if(c == EOF)break; + } + sgetc(p); + salterc(p,-1); + truncate(p); + } + return(p); +} + +int +eqk(void){ + register struct blk *p,*q; + register int skp; + int skq; + + p = pop(); + EMPTYS; + q = pop(); + EMPTYSR(p); + skp = sunputc(p); + skq = sunputc(q); + if(skp == skq){ + arg1=p; + arg2=q; + savk = skp; + return(0); + } + else if(skp < skq){ + savk = skq; + p = add0(p,skq-skp); + } + else { + savk = skp; + q = add0(q,skp-skq); + } + arg1=p; + arg2=q; + return(0); +} + +struct blk * +removc(struct blk *p,int n) +{ + register struct blk *q,*r; + + rewind(p); + while(n>1){ + sgetc(p); + n -= 2; + } + q = salloc(2); + while(sfeof(p) == 0)sputc(q,sgetc(p)); + if(n == 1){ + r = dcdiv(q,tenptr); + release(q); + release(rem); + q = r; + } + release(p); + return(q); +} + +struct blk * +scalint(struct blk *p) +{ + register int n; + n = sunputc(p); + p = removc(p,n); + return(p); +} + +struct blk * +scale(struct blk *p,int n) +{ + register struct blk *q,*s,*t; + + t = add0(p,n); + q = salloc(1); + sputc(q,n); + s = dcexp(inbas,q); + release(q); + q = dcdiv(t,s); + release(t); + release(s); + release(rem); + sputc(q,n); + return(q); +} + +int +subt(void){ + arg1=pop(); + EMPTYS; + savk = sunputc(arg1); + chsign(arg1); + sputc(arg1,savk); + pushp(arg1); + if(eqk() != 0)return(1); + binop('+'); + return(0); +} + +int +command(void){ + int c; + static char *line; + static int linesize; + char *sl; + register void (*savint)(int); + register int pid,rpid; + int retcode; + + switch(c = readc()){ + case '<': + return(cond(NL)); + case '>': + return(cond(NG)); + case '=': + return(cond(NE)); + default: + if (line == 0) + line = srealloc(0, linesize = 10); + sl = line; + *sl++ = c; + while((c = readc()) != '\n') { + if (sl >= &line[linesize-2]) { + int diff = sl - line; + line = srealloc(line, linesize += 10); + sl = &line[diff]; + } + *sl++ = c; + } + *sl = 0; + if((pid = fork()) == 0){ + execl(SHELL,"sh","-c",line,NULL); + exit(0100); + } + savint = sigset(SIGINT, SIG_IGN); + while((rpid = wait(&retcode)) != pid && rpid != -1); + sigset(SIGINT,savint); + printf("!\n"); + return(0); + } +} + +int +cond(char c) +{ + register struct blk *p; + register int cc; + + if(subt() != 0)return(1); + p = pop(); + sunputc(p); + if(length(p) == 0){ + release(p); + if(c == '<' || c == '>' || c == NE){ + readc(); + return(0); + } + load(); + return(1); + } + else { + if(c == '='){ + release(p); + readc(); + return(0); + } + } + if(c == NE){ + release(p); + load(); + return(1); + } + fsfile(p); + cc = sbackc(p); + release(p); + if((cc<0 && (c == '<' || c == NG)) || + (cc >0) && (c == '>' || c == NL)){ + readc(); + return(0); + } + load(); + return(1); +} + +void +load(void){ + register int c; + register struct blk *p,*q; + struct blk *t,*s; + c = readc() & 0377; + sptr = stable[c]; + if(sptr != 0){ + p = sptr->val; + if(c >= ARRAYST){ + q = salloc(length(p)); + rewind(p); + while(sfeof(p) == 0){ + s = dcgetwd(p); + if(s == 0){putwd(q, (struct blk *)NULL);} + else{ + t = copy(s,length(s)); + putwd(q,t); + } + } + pushp(q); + } + else{ + q = copy(p,length(p)); + pushp(q); + } + } + else{ + q = salloc(1); + sputc(q,0); + pushp(q); + } + return; +} + +int +log_2(long n) +{ + register int i; + + if(n == 0)return(0); + i=31; + if(n<0)return(i); + while((n= n<<1) >0)i--; + return(--i); +} + +struct blk * +salloc(int size) +{ + register struct blk *hdr; + register char *ptr; + all++; + nbytes += size; + ptr = malloc((unsigned)(size?size:1)); + if(ptr == 0){ + garbage("salloc"); + if((ptr = malloc((unsigned)(size?size:1))) == 0) + ospace("salloc"); + } + if((hdr = hfree) == 0)hdr = morehd(); + hfree = (struct blk *)hdr->rd; + hdr->rd = hdr->wt = hdr->beg = ptr; + hdr->last = ptr+size; + return(hdr); +} + +struct blk * +morehd(void){ + register struct blk *h,*kk; + headmor++; + nbytes += HEADSZ; + hfree = h = (struct blk *)malloc(HEADSZ); + if(hfree == 0){ + garbage("morehd"); + if((hfree = h = (struct blk *)malloc(HEADSZ)) == 0) + ospace("headers"); + } + kk = h; + while(h<hfree+(HEADSZ/BLK))(h++)->rd = (char *)++kk; + (--h)->rd=0; + return(hfree); +} + +/* +sunputc(struct blk *hptr) +{ + hptr->wt--; + hptr->rd = hptr->wt; + return(*hptr->wt); +} +*/ + +struct blk * +copy(struct blk *hptr,int size) +{ + register struct blk *hdr; + register unsigned sz; + register char *ptr; + + all++; + nbytes += size; + sz = length(hptr); + ptr = nalloc(hptr->beg, (unsigned)size); + if(ptr == 0){ + garbage("copy"); + if((ptr = nalloc(hptr->beg, (unsigned)size)) == NULL){ + printf("copy size %d\n",size); + ospace("copy"); + } + } + if((hdr = hfree) == 0)hdr = morehd(); + hfree = (struct blk *)hdr->rd; + hdr->rd = hdr->beg = ptr; + hdr->last = ptr+size; + hdr->wt = ptr+sz; + ptr = hdr->wt; + while(ptr<hdr->last)*ptr++ = '\0'; + return(hdr); +} + +void +sdump(char *s1,struct blk *hptr) +{ + char *p; + printf("%s %lo rd %lo wt %lo beg %lo last %lo\n", s1, + (long)(intptr_t)hptr, + (long)(intptr_t)hptr->rd, + (long)(intptr_t)hptr->wt, + (long)(intptr_t)hptr->beg, + (long)(intptr_t)hptr->last); + p = hptr->beg; + while(p < hptr->wt)printf("%d ",*p++); + printf("\n"); +} + +void +seekc(struct blk *hptr,int n) +{ + register char *nn,*p; + + nn = hptr->beg+n; + if(nn > hptr->last){ + nbytes += nn - hptr->last; + /*free(hptr->beg);*/ + p = realloc(hptr->beg, (unsigned)n); + if(p == 0){ + hptr->beg = realloc(hptr->beg, (unsigned)(hptr->last-hptr->beg)); + garbage("seekc"); + if((p = realloc(hptr->beg, (unsigned)n)) == 0) + ospace("seekc"); + } + hptr->beg = p; + hptr->wt = hptr->last = hptr->rd = p+n; + return; + } + hptr->rd = nn; + if(nn>hptr->wt)hptr->wt = nn; + return; +} + +void +salterwd(struct wblk *hptr,struct blk *n) +{ + if(hptr->rdw == hptr->lastw)more((struct blk *)hptr); + *hptr->rdw++ = n; + if(hptr->rdw > hptr->wtw)hptr->wtw = hptr->rdw; + return; +} + +void +more(struct blk *hptr) +{ + register unsigned size; + register char *p; + + if((size=(hptr->last-hptr->beg)*2) == 0)size=1; + nbytes += size/2; + /*free(hptr->beg);*/ + p = realloc(hptr->beg, (unsigned)size); + if(p == 0){ + hptr->beg = realloc(hptr->beg, (unsigned)(hptr->last-hptr->beg)); + garbage("more"); + if((p = realloc(hptr->beg,size)) == 0) + ospace("more"); + } + hptr->rd = hptr->rd-hptr->beg+p; + hptr->wt = hptr->wt-hptr->beg+p; + hptr->beg = p; + hptr->last = p+size; + return; +} + +void +ospace(char *s) +{ + printf("out of space: %s\n",s); + printf("all %ld rel %ld headmor %ld\n",all,rel,headmor); + printf("nbytes %ld\n",nbytes); + sdump("stk",*stkptr); + abort(); +} + +void +garbage(char *s) +{ + int i; + struct blk *p, *q; + struct sym *tmps; + int ct; + +/* printf("got to garbage %s\n",s); */ + for(i=0;i<TBLSZ;i++){ + tmps = stable[i]; + if(tmps != 0){ + if(i < ARRAYST){ + do { + p = tmps->val; + if(((intptr_t)p->beg & 01) != 0){ + printf("string %o\n",i); + sdump("odd beg",p); + } + redef(p); + tmps = tmps->next; + } while(tmps != 0); + continue; + } + else { + do { + p = tmps->val; + rewind(p); + ct = 0; + while((q = dcgetwd(p)) != NULL){ + ct++; + if(q != 0){ + if(((intptr_t)q->beg & 01) != 0){ + printf("array %o elt %d odd\n",i-ARRAYST,ct); +printf("tmps %lo p %lo\n",(long)(intptr_t)tmps,(long)(intptr_t)p); + sdump("elt",q); + } + redef(q); + } + } + tmps = tmps->next; + } while(tmps != 0); + } + } + } +} + +void +redef(struct blk *p) +{ + register int offset; + register char *newp; + + if ((intptr_t)p->beg&01) { + printf("odd ptr %lo hdr %lo\n",(long)(intptr_t)p->beg, + (long)(intptr_t)p); + ospace("redef-bad"); + } + /*free(p->beg);*/ + newp = realloc(p->beg, (unsigned)(p->last-p->beg)); + if(newp == NULL)ospace("redef"); + offset = newp - p->beg; + p->beg = newp; + p->rd += offset; + p->wt += offset; + p->last += offset; +} + +void +release(register struct blk *p) +{ + rel++; + nbytes -= p->last - p->beg; + p->rd = (char *)hfree; + hfree = p; + free(p->beg); +} + +struct blk * +dcgetwd(struct blk *p) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->rdw == wp->wtw) + return(NULL); + return(*wp->rdw++); +} + +void +putwd(struct blk *p, struct blk *c) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->wtw == wp->lastw) + more(p); + *wp->wtw++ = c; +} + +struct blk * +lookwd(struct blk *p) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->rdw == wp->wtw) + return(NULL); + return(*wp->rdw); +} + +char * +nalloc(register char *p,unsigned nbytes) +{ + register char *q, *r; + q = r = malloc(nbytes ? nbytes : 1); + if(q==0) + return(0); + while(nbytes--) + *q++ = *p++; + return(r); +} + +void * +srealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == 0) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} diff --git a/dc/dc.h b/dc/dc.h @@ -0,0 +1,203 @@ +/* from Unix 7th Edition /usr/src/cmd/dc/dc.h */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)dc.h 1.9 (gritter) 2/4/05> */ + +#include <stdlib.h> +#include <signal.h> + +#define FATAL 0 +#define NFATAL 1 +#define BLK sizeof(struct blk) +#define PTRSZ sizeof(int *) +#define HEADSZ 1024 +#define STKSZ 100 +#define RDSKSZ 100 +#define TBLSZ 256 +#define ARRAYST 0241 +#define NL 1 +#define NG 2 +#define NE 3 +#define length(p) ((p)->wt-(p)->beg) +#define rewind(p) (p)->rd=(p)->beg +#define create(p) (p)->rd = (p)->wt = (p)->beg +#define fsfile(p) (p)->rd = (p)->wt +#define truncate(p) (p)->wt = (p)->rd +#define sfeof(p) (((p)->rd>=(p)->wt)?1:0) +#define sfbeg(p) (((p)->rd==(p)->beg)?1:0) +#define sungetc(p,c) *(--(p)->rd)=c +#ifdef interdata +#define NEGBYTE 0200 +#define MASK (-1 & ~0377) +#define sgetc(p) ( ((p)->rd==(p)->wt) ? EOF :( ((*(p)->rd & NEGBYTE) != 0) ? ( *(p)->rd++ | MASK): *(p)->rd++ )) +#define slookc(p) ( ((p)->rd==(p)->wt) ? EOF :( ((*(p)->rd & NEGBYTE) != 0) ? (*(p)->rd | MASK) : *(p)->rd )) +#define sbackc(p) ( ((p)->rd==(p)->beg) ? EOF :( ((*(--(p)->rd) & NEGBYTE) != 0) ? (*(p)->rd | MASK): *(p)->rd )) +#endif +#ifndef interdata +#define sgetc(p) (((p)->rd==(p)->wt)?EOF:*(p)->rd++) +#define slookc(p) (((p)->rd==(p)->wt)?EOF:*(p)->rd) +#define sbackc(p) (((p)->rd==(p)->beg)?EOF:*(--(p)->rd)) +#endif +#define sputc(p,c) {if((p)->wt==(p)->last)more(p); *(p)->wt++ = c; } +#define salterc(p,c) {if((p)->rd==(p)->last)more(p); *(p)->rd++ = c; if((p)->rd>(p)->wt)(p)->wt=(p)->rd;} +#define sunputc(p) (*( (p)->rd = --(p)->wt)) +#define zero(p) for(pp=(p)->beg;pp<(p)->last;)*pp++='\0' +#define OUTC(x) {int _c = (x); if (_c) {printf("%c",_c); if(--count == 0){printf("\\\n"); count=ll;} } } +#define TEST2(b) { OUTC(b[0] & 0377); OUTC(b[1] & 0377); } +#define EMPTY if(stkerr != 0){printf("stack empty\n"); continue; } +#define EMPTYR(x) if(stkerr!=0){pushp(x);printf("stack empty\n");continue;} +#define EMPTYS if(stkerr != 0){printf("stack empty\n"); return(1);} +#define EMPTYSR(x) if(stkerr !=0){printf("stack empty\n");pushp(x);return(1);} +#define error(p) {printf(p); continue; } +#define errorrt(p) {printf(p); return(1); } +struct blk { + char *rd; + char *wt; + char *beg; + char *last; +}; +struct blk *hfree; +struct blk *arg1, *arg2; +int svargc; +char savk; +char **svargv; +int dbg; +int ifile; +FILE *curfile; +struct blk *scalptr, *basptr, *tenptr, *inbas; +struct blk *sqtemp, *chptr, *strptr, *divxyz; +struct blk *stack[STKSZ]; +struct blk **stkptr,**stkbeg; +struct blk **stkend; +int stkerr; +int lastchar; +struct blk *readstk[RDSKSZ]; +struct blk **readptr; +struct blk *rem; +int k; +struct blk *irem; +int skd,skr; +int neg; +struct sym { + struct sym *next; + struct blk *val; +} symlst[TBLSZ]; +struct sym *stable[TBLSZ]; +struct sym *sptr,*sfree; +struct wblk { + struct blk **rdw; + struct blk **wtw; + struct blk **begw; + struct blk **lastw; +}; +FILE *fsave; +long rel; +long nbytes; +long all; +long headmor; +long obase; +int fw,fw1,ll; +int (*outdit)(struct blk *, int, int); +int logo; +int log_10; +int count; +char *pp; +char *dummy; + +#define div(a, b) dcdiv(a, b) +#define sqrt(a) dcsqrt(a) +#define exp(a, b) dcexp(a, b) +#define getwd(a) dcgetwd(a) +extern void commnds(void); +extern struct blk *div(struct blk *, struct blk *); +extern int dscale(void); +extern struct blk *removr(struct blk *, int); +extern struct blk *sqrt(struct blk *); +extern struct blk *exp(struct blk *, struct blk *); +extern void init(int, char *[]); +extern void onintr(int); +extern void pushp(struct blk *); +extern struct blk *pop(void); +extern struct blk *readin(void); +extern struct blk *add0(struct blk *, int); +extern struct blk *mult(struct blk *, struct blk *); +extern void chsign(struct blk *); +extern int readc(void); +extern void unreadc(char); +extern void binop(char); +extern void print(struct blk *); +extern struct blk *getdec(struct blk *, int); +extern void tenot(struct blk *, int); +extern void oneot(struct blk *, int, char); +extern void hexot(struct blk *, int, int); +extern void bigot(struct blk *, int, int); +extern struct blk *add(struct blk *, struct blk *); +extern int eqk(void); +extern struct blk *removc(struct blk *, int); +extern struct blk *scalint(struct blk *); +extern struct blk *scale(struct blk *, int); +extern int subt(void); +extern int command(void); +extern int cond(char); +extern void load(void); +extern int log_2(long); +extern struct blk *salloc(int); +extern struct blk *morehd(void); +extern struct blk *copy(struct blk *, int); +extern void sdump(char *, struct blk *); +extern void seekc(struct blk *, int); +extern void salterwd(struct wblk *, struct blk *); +extern void more(struct blk *); +extern void ospace(char *); +extern void garbage(char *); +extern void redef(struct blk *); +extern void release(register struct blk *); +extern struct blk *getwd(struct blk *); +extern void putwd(struct blk *, struct blk *); +extern struct blk *lookwd(struct blk *); +extern char *nalloc(register char *, unsigned); +extern void *srealloc(void *, size_t); + +#if defined (__GLIBC__) && defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif + +#ifndef BC_BASE_MAX +#define BC_BASE_MAX 99 +#endif +#ifndef BC_DIM_MAX +#define BC_DIM_MAX 2048 +#endif diff --git a/dc/mkfile b/dc/mkfile @@ -0,0 +1,8 @@ +BIN = dc +OBJ = dc.o +LOCAL_CFLAGS = -DSHELL=\"$SHELL\" +INSTALL_BIN = dc +INSTALL_MAN1 = dc.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/dc/version.c b/dc/version.c @@ -0,0 +1,13 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)dc.sl 2.12 (gritter) 12/25/06"; +/* SLIST */ +/* +dc.c: Sccsid @(#)dc.c 1.21 (gritter) 12/25/06> +dc.h: Sccsid @(#)dc.h 1.9 (gritter) 2/4/05> +*/ diff --git a/dd/dd.1 b/dd/dd.1 @@ -0,0 +1,293 @@ +.\" +.\" Sccsid @(#)dd.1 1.7 (gritter) 1/14/05 +.\" +.\" Parts taken from dd(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH DD 1 "1/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +dd \- convert and copy a file +.SH SYNOPSIS +.B dd +[option=value] ... +.SH DESCRIPTION +.I Dd +copies the specified input file +to the specified output with +possible conversions. +The standard input and output are used by default. +The input and output block size may be +specified to take advantage of raw physical I/O. +.PP +.br +.ns +.TP 15 +.I option +.I values +.br +.ns +.TP +if= +input file name; standard input is default +.br +.ns +.TP +of= +output file name; standard output is default +.br +.ns +.TP +.RI ibs= n +input block size +.I n +bytes (default 512) +.br +.ns +.TP +.RI obs= n +output block size (default 512) +.br +.ns +.TP +.RI bs= n +set both input and output block size, +superseding +.I ibs +and +.I obs; +also, if no conversion is specified, +it is particularly efficient since no copy need be done +.br +.ns +.TP +.RI cbs= n +conversion buffer size +.br +.ns +.TP +.RI skip= n +skip +.IR n "" +input records before starting copy +.br +.ns +.TP +.RI iseek= n +seek +.IR n "" +input records before starting copy +.br +.ns +.TP +.RI files= n +copy +.I n +files from (tape) input +.br +.ns +.TP +.RI seek= n +seek +.I n +records from beginning of output file before copying +.br +.ns +.TP +.RI oseek= n +same as seek +.br +.ns +.TP +count=\fIn\fR +copy only +.IR n "" +input records +.br +.ns +.TP +conv=ascii +.ds h \h'\w'conv='u' +convert EBCDIC to ASCII +.br +.ns +.IP \*hebcdic +convert ASCII to EBCDIC +.br +.ns +.IP \*hibm +slightly different map of ASCII to EBCDIC +.br +.ns +.IP \*hblock +convert newline-terminated input lines to blocks +.br +.ns +.IP \*hunblock +convert blocked input to lines +.br +.ns +.IP \*hlcase +map alphabetics to lower case +.br +.ns +.IP \*hucase +map alphabetics to upper case +.br +.ns +.IP \*hswab +swap every pair of bytes +.br +.ns +.IP \*hnoerror +do not stop processing on an error +.br +.ns +.IP \*hnotrunc +do not truncate the output file +.br +.ns +.IP \*hsync +pad every input record to +.I ibs +.br +.ns +.IP "\*h... , ..." +several comma-separated conversions +.PP +.fi +Where sizes are specified, +a number of bytes is expected. +A number may end with +.B "k, b" +or +.B w +to specify multiplication by +1024, 512, or 2 respectively; +a pair of numbers may be separated by +.B x +to indicate a product. +.PP +.I Cbs +is used only if +.IR ascii , +.IR unblock, +.IR ebcdic , +.IR ibm , +or +.IR block +conversion is specified. +In the first two cases, +.I cbs +bytes are placed into the conversion buffer, converted to +ASCII, and trailing blanks trimmed and new-line added +before sending the line to the output. +In the latter three cases, +ASCII characters (bytes) are read into the +conversion buffer, converted to EBCDIC, and blanks added +to make up an +output record of size +.IR cbs . +.PP +Two additional values for the `conv' +option, `conv=idirect' and `conv=odirect', +are available as extensions. +They enable direct i/o on input or output, respectively. +See the description of the `O_DIRECT' flag in +.IR open (2) +for more information. +`conv=odirect' must be used with care +as it requires padding for correct operation; +a write that is not a multiple of an acceptable buffer size will fail. +This is particularly of concern for the last block written. +Using `conv=odirect' thus usually requires `conv=sync' +and cannot be used if such padding destroys the file integrity. +.PP +After completion, +.I dd +reports the number of whole and partial input and output +blocks. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +for `conv=lcase' and `conv=ucase'. +.SH EXAMPLES +To read an EBCDIC tape blocked ten 80-byte +EBCDIC card images per record into the ASCII file +.IR x : +.IP "" +dd if=/dev/rmt0 of=x ibs=800 cbs=80 conv=ascii,lcase +.PP +Note the use of raw magtape. +.I Dd +is especially suited to I/O on the raw +physical devices because it allows reading +and writing in arbitrary record sizes. +.PP +To skip over a file before copying from magnetic tape do +.IP "" +(dd of=/dev/null; dd of=x) </dev/rmt0 +.SH "SEE ALSO" +cp(1), +tr(1), +locale(7) +.SH DIAGNOSTICS +f+p records in(out): numbers of full and partial records read(written) +.SH NOTES +The ASCII/EBCDIC conversion tables are +taken +.\" This was valid for v7/BSD conversion tables. +.\"from the 256 character standard in +.\"the CACM Nov, 1968. +.\" *** +.\" This is what various AT&T sources and mem(3) of libast say for the +.\" current tables. The tables itselves are given in the POSIX.2 rationale. +from a proposed BTL standard April 16, 1979. +The `ibm' conversion, while less blessed as a standard, +corresponds better to certain IBM print train conventions. +There is no universal solution. +.PP +When reading from pipes, FIFOs, character devices (e.\|g. terminals), +or network sockets, +partial input records can occur at any time +even before the end of the data stream is reached. +For the `count' option, +these are handled exactly like full records. +Using +.I dd +to retrieve exactly +.IR count * ibs +bytes from such files does thus not generally work unless `ibs=1'. diff --git a/dd/dd.c b/dd/dd.c @@ -0,0 +1,1035 @@ +/* + * dd - convert and copy + * + * Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)dd.sl 1.30 (gritter) 1/22/06"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <malloc.h> +#include <errno.h> +#include <libgen.h> +#include <ctype.h> +#include <locale.h> +#include <signal.h> +#include "sigset.h" +#include <wchar.h> +#include <wctype.h> +#include <limits.h> + +#include <sys/ioctl.h> + +#if defined (__linux__) || defined (__sun) || defined (__FreeBSD__) || \ + defined (__hpux) || defined (_AIX) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <sys/mtio.h> +#else /* SVR4.2MP */ +#include <sys/scsi.h> +#include <sys/st01.h> +#endif /* SVR4.2MP */ + +#include "atoll.h" +#include "memalign.h" +#include "mbtowi.h" + +/* + * For 'conv=ascii'. + */ +static const unsigned char c_ascii[] = { +0000,0001,0002,0003,0234,0011,0206,0177,0227,0215,0216,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0235,0205,0010,0207,0030,0031,0222,0217,0034,0035,0036,0037, +0200,0201,0202,0203,0204,0012,0027,0033,0210,0211,0212,0213,0214,0005,0006,0007, +0220,0221,0026,0223,0224,0225,0226,0004,0230,0231,0232,0233,0024,0025,0236,0032, +0040,0240,0241,0242,0243,0244,0245,0246,0247,0250,0325,0056,0074,0050,0053,0174, +0046,0251,0252,0253,0254,0255,0256,0257,0260,0261,0041,0044,0052,0051,0073,0176, +0055,0057,0262,0263,0264,0265,0266,0267,0270,0271,0313,0054,0045,0137,0076,0077, +0272,0273,0274,0275,0276,0277,0300,0301,0302,0140,0072,0043,0100,0047,0075,0042, +0303,0141,0142,0143,0144,0145,0146,0147,0150,0151,0304,0305,0306,0307,0310,0311, +0312,0152,0153,0154,0155,0156,0157,0160,0161,0162,0136,0314,0315,0316,0317,0320, +0321,0345,0163,0164,0165,0166,0167,0170,0171,0172,0322,0323,0324,0133,0326,0327, +0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0135,0346,0347, +0173,0101,0102,0103,0104,0105,0106,0107,0110,0111,0350,0351,0352,0353,0354,0355, +0175,0112,0113,0114,0115,0116,0117,0120,0121,0122,0356,0357,0360,0361,0362,0363, +0134,0237,0123,0124,0125,0126,0127,0130,0131,0132,0364,0365,0366,0367,0370,0371, +0060,0061,0062,0063,0064,0065,0066,0067,0070,0071,0372,0373,0374,0375,0376,0377 +}; + +/* + * For 'conv=ibm'. + */ +static const unsigned char c_ibm[] = { +0000,0001,0002,0003,0067,0055,0056,0057,0026,0005,0045,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0074,0075,0062,0046,0030,0031,0077,0047,0034,0035,0036,0037, +0100,0132,0177,0173,0133,0154,0120,0175,0115,0135,0134,0116,0153,0140,0113,0141, +0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0172,0136,0114,0176,0156,0157, +0174,0301,0302,0303,0304,0305,0306,0307,0310,0311,0321,0322,0323,0324,0325,0326, +0327,0330,0331,0342,0343,0344,0345,0346,0347,0350,0351,0255,0340,0275,0137,0155, +0171,0201,0202,0203,0204,0205,0206,0207,0210,0211,0221,0222,0223,0224,0225,0226, +0227,0230,0231,0242,0243,0244,0245,0246,0247,0250,0251,0300,0117,0320,0241,0007, +0040,0041,0042,0043,0044,0025,0006,0027,0050,0051,0052,0053,0054,0011,0012,0033, +0060,0061,0032,0063,0064,0065,0066,0010,0070,0071,0072,0073,0004,0024,0076,0341, +0101,0102,0103,0104,0105,0106,0107,0110,0111,0121,0122,0123,0124,0125,0126,0127, +0130,0131,0142,0143,0144,0145,0146,0147,0150,0151,0160,0161,0162,0163,0164,0165, +0166,0167,0170,0200,0212,0213,0214,0215,0216,0217,0220,0232,0233,0234,0235,0236, +0237,0240,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267, +0270,0271,0272,0273,0274,0275,0276,0277,0312,0313,0314,0315,0316,0317,0332,0333, +0334,0335,0336,0337,0352,0353,0354,0355,0356,0357,0372,0373,0374,0375,0376,0377 +}; + +/* + * For 'conv=ebcdic'. + */ +static const unsigned char c_ebcdic[] = { +0000,0001,0002,0003,0067,0055,0056,0057,0026,0005,0045,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0074,0075,0062,0046,0030,0031,0077,0047,0034,0035,0036,0037, +0100,0132,0177,0173,0133,0154,0120,0175,0115,0135,0134,0116,0153,0140,0113,0141, +0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0172,0136,0114,0176,0156,0157, +0174,0301,0302,0303,0304,0305,0306,0307,0310,0311,0321,0322,0323,0324,0325,0326, +0327,0330,0331,0342,0343,0344,0345,0346,0347,0350,0351,0255,0340,0275,0232,0155, +0171,0201,0202,0203,0204,0205,0206,0207,0210,0211,0221,0222,0223,0224,0225,0226, +0227,0230,0231,0242,0243,0244,0245,0246,0247,0250,0251,0300,0117,0320,0137,0007, +0040,0041,0042,0043,0044,0025,0006,0027,0050,0051,0052,0053,0054,0011,0012,0033, +0060,0061,0032,0063,0064,0065,0066,0010,0070,0071,0072,0073,0004,0024,0076,0341, +0101,0102,0103,0104,0105,0106,0107,0110,0111,0121,0122,0123,0124,0125,0126,0127, +0130,0131,0142,0143,0144,0145,0146,0147,0150,0151,0160,0161,0162,0163,0164,0165, +0166,0167,0170,0200,0212,0213,0214,0215,0216,0217,0220,0152,0233,0234,0235,0236, +0237,0240,0252,0253,0254,0112,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267, +0270,0271,0272,0273,0274,0241,0276,0277,0312,0313,0314,0315,0316,0317,0332,0333, +0334,0335,0336,0337,0352,0353,0354,0355,0356,0357,0372,0373,0374,0375,0376,0377 +}; + +static char *progname; /* argv[0] to main() */ + +typedef long long d_type; + +static char *iblok; /* input buffer */ +static char *oblok; /* output buffer */ +static char *cblok; /* conversion buffer */ + +static char mblok[MB_LEN_MAX+1]; /* tow{upper|lower} buffer */ +static char *mbp; /* points to remaining chars in mblok */ +static int mbrest; /* number of remaining chars in mblok */ + +static const char *iffile; /* input file name */ +static int iffd; /* input file descriptor */ +static const char *offile; /* output file name */ +static int offd; /* output file descriptor */ +static struct stat istat; /* stat of input */ +static struct stat ostat; /* stat of output */ +static d_type ibs = 512; /* input block size */ +static d_type obs = 512; /* output block size */ +static d_type bs; /* size for both buffers */ +static d_type oflow; /* remaining bytes in output buffer */ +static d_type cbs; /* conversion block size */ +static d_type cflow; /* remaining bytes in conv. buffer */ +static int ctrunc; /* truncate current data (conv=block) */ +static d_type skip; /* skip these blocks on input */ +static d_type count = -1; /* no more than count blocks of input */ +static int files = 1; /* read EOF this many times */ +static d_type iseek; /* seek these blocks on input */ +static d_type oseek; /* seek these blocks on output */ +static int mb_cur_max; /* MB_CUR_MAX acceleration */ + +static d_type iwhole; /* statistics */ +static d_type ipartial; +static d_type owhole; +static d_type opartial; +static d_type truncated; + +static enum charconv { + CHAR_NONE = 0, + CHAR_ASCII = 1, + CHAR_EBCDIC = 2, + CHAR_IBM = 3 +} chars = CHAR_NONE; + +static enum conversion { + CONV_NONE = 0, + CONV_BLOCK = 01, + CONV_UNBLOCK = 02, + CONV_LCASE = 04, + CONV_UCASE = 010, + CONV_SWAB = 020, + CONV_NOERROR = 040, + CONV_NOTRUNC = 0100, + CONV_IDIRECT = 0200, + CONV_ODIRECT = 0400, + CONV_DIRECT = 0600, + CONV_SYNC = 01000 +} convs = CONV_NONE; + +static struct { + const char *c_name; + enum conversion c_conv; + enum charconv c_char; +} convtab[] = { + { "ascii", CONV_UNBLOCK, CHAR_ASCII }, + { "ebcdic", CONV_BLOCK, CHAR_EBCDIC }, + { "ibm", CONV_BLOCK, CHAR_IBM }, + { "block", CONV_BLOCK, CHAR_NONE }, + { "unblock", CONV_UNBLOCK, CHAR_NONE }, + { "lcase", CONV_LCASE, CHAR_NONE }, + { "ucase", CONV_UCASE, CHAR_NONE }, + { "swab", CONV_SWAB, CHAR_NONE }, + { "noerror", CONV_NOERROR, CHAR_NONE }, + { "notrunc", CONV_NOTRUNC, CHAR_NONE }, +#ifdef O_DIRECT + { "idirect", CONV_IDIRECT, CHAR_NONE }, + { "odirect", CONV_ODIRECT, CHAR_NONE }, +#endif /* O_DIRECT */ + { "sync", CONV_SYNC, CHAR_NONE }, + { NULL, CONV_NONE, CHAR_NONE } +}; + +static void * +bmalloc(size_t nbytes) +{ + static long pagesize; + void *vp; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if ((vp = memalign(pagesize, nbytes)) == NULL) { + fprintf(stderr, "%s: not enough memory\n", progname); + fprintf(stderr, "Please use a smaller buffer size\n"); + exit(077); + } + return vp; +} + +/************************** ARGUMENT SCANNING ***************************/ +static void +badarg(const char *arg) +{ + fprintf(stderr, "%s: bad arg: \"%s\"\n", progname, arg); + exit(2); +} + +static void +badnumeric(const char *arg) +{ + fprintf(stderr, "%s: bad numeric arg: \"%s\"\n", progname, arg); + exit(2); +} + +static void +nozeroblok(void) +{ + fprintf(stderr, "%s: buffer sizes cannot be zero\n", progname); + exit(2); +} + +/* + * Get the value of a numeric argument. + */ +static d_type +expr(const char *ap) +{ + d_type val; + char *x; + int c; + + if (*ap == '-' || *ap == '+') + badnumeric(ap); + val = strtoull(ap, &x, 10); + while ((c = *x++) != '\0') { + switch (c) { + case 'k': + val *= 1024; + break; + case 'b': + val *= 512; + break; + case 'w': + val *= 2; + break; + case 'x': + case '*': + return val * expr(x); + default: + badnumeric(ap); + } + } + return val; +} + +static void +setin(const char *ap) +{ + iffile = ap; +} + +static void +setof(const char *ap) +{ + offile = ap; +} + +static void +setibs(const char *ap) +{ + ibs = expr(ap); + if (ibs == 0) + nozeroblok(); +} + +static void +setobs(const char *ap) +{ + obs = expr(ap); + if (obs == 0) + nozeroblok(); +} + +static void +setbs(const char *ap) +{ + bs = expr(ap); +} + +static void +setcbs(const char *ap) +{ + cbs = expr(ap); +} + +static void +setskip(const char *ap) +{ + skip = expr(ap); +} + +static void +setcount(const char *ap) +{ + count = expr(ap); +} + +static void +setconv(const char *ap) +{ + const char *cp, *cq; + int i; + + for (;;) { + while (*ap == ',') + ap++; + if (*ap == '\0') + break; + for (i = 0; convtab[i].c_name; i++) { + for (cp = convtab[i].c_name, cq = ap; + *cp && (*cp == *cq); + cp++, cq++); + if (*cp == '\0' && (*cq == ',' || *cq == '\0')) { + convs |= convtab[i].c_conv; + if (convtab[i].c_char != CHAR_NONE) + chars = convtab[i].c_char; + ap = cq; + goto next; + } + } + badarg(ap); + next:; + } +} + +static void +setfiles(const char *ap) +{ + files = expr(ap); +} + +static void +setiseek(const char *ap) +{ + iseek = expr(ap); +} + +static void +setoseek(const char *ap) +{ + oseek = expr(ap); +} + +static struct { + const char *a_name; + void (*a_func)(const char *); +} argtab[] = { + { "if=", setin }, + { "of=", setof }, + { "ibs=", setibs }, + { "obs=", setobs }, + { "bs=", setbs }, + { "cbs=", setcbs }, + { "skip=", setskip }, + { "seek=", setoseek }, + { "count=", setcount }, + { "conv=", setconv }, + { "files=", setfiles }, + { "iseek=", setiseek }, + { "oseek=", setoseek }, + { NULL, NULL } +}; + +static const char * +thisarg(const char *sp, const char *ap) +{ + do { + if (*sp != *ap) + return NULL; + if (*sp == '=') + return &sp[1]; + } while (*sp++ && *ap++); + return NULL; +} + +/******************************* EXECUTION ********************************/ +static void +stats(void) +{ + fprintf(stderr, "%llu+%llu records in\n", + (unsigned long long)iwhole, + (unsigned long long)ipartial); + fprintf(stderr, "%llu+%llu records out\n", + (unsigned long long)owhole, + (unsigned long long)opartial); + if (truncated) { + fprintf(stderr, "%llu truncated record%s\n", + (unsigned long long)truncated, + truncated > 1 ? "s" : ""); + } +} + +static void charconv(char *data, size_t size); +static void bflush(void); +static void cflush(void); +static void uflush(void); + +static void +quit(int status) +{ + if (mbp) + charconv(NULL, 0); + cflush(); + uflush(); + bflush(); + stats(); + exit(status); +} + +static void +onint(int sig) +{ + stats(); + exit(sig | 0200); +} + +static int +ontape(void) +{ + static int yes = -1; + + if (yes == -1) { +#if defined (__linux__) || defined (__FreeBSD__) || defined (__hpux) || \ + defined (_AIX) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) + struct mtget mg; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, MTIOCGET, &mg) == 0; +#elif defined (__sun) + struct mtdrivetype_request mr; + struct mtdrivetype md; + mr.size = sizeof md; + mr.mtdtp = &md; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, MTIOCGETDRIVETYPE, &mr) == 0; +#else /* SVR4.2MP */ + struct blklen bl; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, T_RDBLKLEN, &bl) == 0; +#endif /* SVR4.2MP */ + } + return yes; +} + +static void +seekconv(d_type count) +{ + ssize_t sz; + off_t offs; + + if (lseek(offd, 0, SEEK_CUR) != (off_t)-1) { + do { + if ((offs = lseek(offd, obs, SEEK_CUR)) == (off_t)-1) { + err: fprintf(stderr, "%s: output seek error: %s\n", + progname, strerror(errno)); + exit(3); + } + } while (--count); + if ((convs & CONV_NOTRUNC) == 0 && + (ostat.st_mode&S_IFMT) == S_IFREG) + ftruncate(offd, offs); + return; + } + while (count) { + if ((sz = read(offd, oblok, obs)) == 0) + break; + if (sz < 0) + goto err; + count--; + } + if (count) { + memset(oblok, 0, obs); + do { + if ((sz = write(offd, oblok, obs)) < 0) + goto err; + } while (--count); + } +} + +static void +skipconv(int canseek, d_type count) +{ + ssize_t rd = 0; + + if (canseek && lseek(iffd, 0, SEEK_CUR) == (off_t)-1) + canseek = 0; + while (count--) { + if (canseek) { + if (lseek(iffd, ibs, SEEK_CUR) != (off_t)-1) + rd = ibs; + else if (errno == EINVAL) + rd = 0; + else { + fprintf(stderr, "%s: input seek error: %s\n", + progname, strerror(errno)); + exit(3); + } + } else { + if ((rd = read(iffd, iblok, ibs)) < 0) { + fprintf(stderr, + "%s: read error during skip: %s\n", + progname, strerror(errno)); + exit(3); + } + } + if (rd == 0 && files-- <= 1) { + fprintf(stderr, "%s: cannot skip past end-of-file\n", + progname); + exit(3); + } + } +} + +static void +prepare(void) +{ + int flags; + + if (bs) + ibs = obs = bs; + iblok = bmalloc(ibs); + if (!(bs && chars == CHAR_NONE && + (convs|CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT) + == (CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT))) + oblok = bmalloc(obs); + if (cbs > 0) { + if ((convs & (CONV_BLOCK|CONV_UNBLOCK)) == 0) { + fprintf(stderr, + "%s: cbs must be zero if no block conversion requested\n", + progname); + exit(2); + } + cblok = bmalloc(cbs + 1); + } else + convs &= ~(CONV_BLOCK|CONV_UNBLOCK); + if ((iffd = iffile ? open(iffile, O_RDONLY) : dup(0)) < 0) { + fprintf(stderr, "%s: cannot open %s: %s\n", progname, + iffile ? iffile : "", strerror(errno)); + exit(1); + } + fstat(iffd, &istat); +#ifdef O_DIRECT + if (convs & CONV_IDIRECT) { + int flags; + flags = fcntl(iffd, F_GETFL); + fcntl(iffd, F_SETFL, flags | O_DIRECT); + } +#endif /* O_DIRECT */ + if (skip) + skipconv(0, skip); + else if (iseek) + skipconv(1, iseek); + flags = O_RDWR | O_CREAT; + if ((convs & CONV_NOTRUNC) == 0 && oseek == 0) + flags |= O_TRUNC; + if ((offd = offile ? open(offile, flags, 0666) : dup(1)) < 0) { + fprintf(stderr, "%s: cannot %s %s: %s\n", + progname, + flags & O_TRUNC ? "create" : "open", + offile ? offile : "", strerror(errno)); + exit(1); + } + fstat(offd, &ostat); +#ifdef O_DIRECT + if (convs & CONV_ODIRECT) { + int flags; + flags = fcntl(offd, F_GETFL); + fcntl(offd, F_SETFL, flags | O_DIRECT); + } +#endif /* O_DIRECT */ + if (oseek) + seekconv(oseek); +} + +static void +swabconv(char *data, size_t size) +{ + char c; + + while (size > 1) { + c = data[0]; + data[0] = data[1]; + data[1] = c; + size -= 2; + data += 2; + } +} + +static void +ascconv(char *data, size_t size) +{ + while (size--) { + *data = c_ascii[*data & 0377]; + data++; + } +} + +static ssize_t +swrite(const char *data, size_t size) +{ + ssize_t wt; + + for (;;) { + if ((wt = write(offd, data, size)) <= 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: write error: %s\n", + progname, strerror(errno)); + oflow = 0; + offd = -1; + quit(1); + } + break; + } + return wt; +} + +/* + * Write without output buffering (if bs= was specified). + */ +static void +dwrite(const char *data, size_t size) +{ + ssize_t wrt; + + do { + wrt = swrite(data, size); + if (wrt == obs) + owhole++; + else + opartial++; + data += wrt; + size -= wrt; + } while (size > 0); +} + +/* + * Write to output buffer. On short write, remaining data is kept within + * the buffer and written next time again. Might a warning be useful in + * this case? + */ +static void +bwrite(const char *data, size_t size) +{ + ssize_t wrt; + size_t di; + + while (oflow + size > obs) { + di = obs - oflow; + size -= di; + if (oflow) { + memcpy(&oblok[oflow], data, di); + wrt = swrite(oblok, obs); + } else + wrt = swrite(data, obs); + if (wrt != obs) { + memcpy(oblok, &(oflow ? oblok : data)[wrt], obs - wrt); + opartial++; + } else + owhole++; + oflow = obs - wrt; + data += di; + } + if (size == obs) { + if ((wrt = swrite(data, obs)) == obs) + owhole++; + else + opartial++; + size -= wrt; + data += wrt; + } + if (size) { + memcpy(&oblok[oflow], data, size); + oflow += size; + } +} + +static void +bflush(void) +{ + ssize_t wrt; + + if (offd >= 0) { + while (oflow) { + if ((wrt = swrite(oblok, oflow)) != oflow) + memcpy(oblok, &oblok[wrt], obs - wrt); + oflow -= wrt; + opartial++; + } + if (close(offd) < 0) { + fprintf(stderr, "%s: write error: %s\n", + progname, strerror(errno)); + offd = -1; + quit(1); + } + offd = -1; + } +} + +/* + * Handle conversions to EBCDIC. + */ +static void +ewrite(char *data, size_t size) +{ + char *dt = data; + size_t sz = size; + if (chars == CHAR_EBCDIC) { + while (sz--) { + *dt = c_ebcdic[*dt & 0377]; + dt++; + } + } else if (chars == CHAR_IBM) { + while (sz--) { + *dt = c_ibm[*dt & 0377]; + dt++; + } + } + bwrite(data, size); +} + +/* + * Handle 'conv=block'. + */ +static void +cflush(void) +{ + if (convs & CONV_BLOCK && cflow) { + while (cflow < cbs) + cblok[cflow++] = ' '; + ewrite(cblok, cbs); + cflow = 0; + } +} + +static void +cwrite(const char *data, size_t size) +{ + while (size) { + if (ctrunc == 0) { + cblok[cflow] = *data++; + if (cblok[cflow] == '\n') { + if (cflow == 0) + cblok[cflow++] = ' '; + cflush(); + } else if (++cflow == cbs) { + cflush(); + ctrunc = 1; + } + } else { + if (*data++ == '\n') + ctrunc = 0; + else if (ctrunc == 1) { + truncated++; + ctrunc = 2; + } + } + size--; + } +} + +/* + * Handle 'conv=unblock'. + */ +static void +uflush(void) +{ + char *cp; + + if (cflow) { + for (cp = &cblok[cflow-1]; cp >= cblok && *cp == ' '; cp--); + cp[1] = '\n'; + bwrite(cblok, cp - cblok + 2); + cflow = 0; + } +} + +static void +uwrite(const char *data, size_t size) +{ + while (size) { + while (cflow < cbs) { + cblok[cflow++] = *data++; + if (--size == 0) + return; + } + uflush(); + } +} + +static void +blokconv(char *data, size_t size) +{ + switch (chars) { + case CHAR_EBCDIC: + case CHAR_IBM: + if ((convs & (CONV_BLOCK|CONV_UNBLOCK)) == 0) { + ewrite(data, size); + break; + } + /*FALLTHRU*/ + default: + if (convs & CONV_BLOCK) + cwrite(data, size); + else if (convs & CONV_UNBLOCK) + uwrite(data, size); + else + bwrite(data, size); + break; + } +} + +static void +charconv(char *data, size_t size) +{ + if (convs & (CONV_LCASE|CONV_UCASE)) { + if (mb_cur_max > 1) { + /* + * Multibyte case conversion is somewhat ugly + * with dd as there is no guarantee that a + * character fits in an input block. We need + * another intermediate therefore to store + * incomplete multibyte sequences. + */ + int i, n, len; + wint_t wc; + int flush = size == 0; + + while (size > 0 || (flush && mbrest)) { + i = 0; + if (mbrest && mbp && mbp > mblok) { + do + mblok[i] = mbp[i]; + while (i++, --mbrest); + } else if (mbp == mblok) { + i = mbrest; + mbrest = 0; + } + if (i == 0 && size) { + mblok[i++] = *data++; + size--; + } + if (mblok[0] & 0200) { + while (i < mb_cur_max && size) { + mblok[i++] = *data++; + size--; + } + if (!flush && i < mb_cur_max) { + mbp = mblok; + mbrest = i; + return; + } + if ((n = mbtowi(&wc, mblok, i)) < 0) { + len = 1; + wc = WEOF; + } else if (n == 0) + len = 1; + else + len = n; + } else { + wc = mblok[0]; + len = n = 1; + } + if (i > 0) { + mbrest = i - len; + mbp = &mblok[len]; + } else { + mbrest = 0; + mbp = NULL; + } + if (wc != WEOF) { + char new[MB_LEN_MAX + 1]; + + if (convs & CONV_LCASE) + wc = wc & ~(wchar_t)0177 ? + towlower(wc) : + tolower(wc); + if (convs & CONV_UCASE) + wc = wc & ~(wchar_t)0177 ? + towupper(wc) : + toupper(wc); + if ((n = wctomb(new, wc)) > 0) + blokconv(new, n); + else + goto inv; + } else + inv: blokconv(mblok, len); + } + return; + } else { + char *dp = data; + size_t sz = size; + + while (sz--) { + if (convs & CONV_LCASE) + *dp = tolower(*dp & 0377); + if (convs & CONV_UCASE) + *dp = toupper(*dp & 0377); + dp++; + } + } + } + blokconv(data, size); +} + +static void +dd(void) +{ + ssize_t rd; + + while (count == -1 || count > 0) { + if ((rd = read(iffd, iblok, ibs)) < ibs) { + if (rd < 0) { + fprintf(stderr, "%s: read error: %s\n", + progname, strerror(errno)); + if (convs & CONV_NOERROR) { + stats(); + if (!ontape()) + lseek(iffd, ibs, SEEK_CUR); + if (convs & CONV_SYNC) + rd = 0; + else + continue; + } else + quit(1); + } else if (rd == 0) { + if (files-- <= 1) + break; + continue; + } else if (rd > 0) + ipartial++; + if (convs & CONV_SYNC) { + int c; + + c = convs&(CONV_BLOCK|CONV_UNBLOCK) ? ' ' : 0; + memset(&iblok[rd], c, ibs - rd); + rd = ibs; + } + } else + iwhole++; + if (count > 0) + count--; + if (bs && chars == CHAR_NONE && + (convs|CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT) + == (CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT)) + dwrite(iblok, rd); + else { + if (convs & CONV_SWAB) + swabconv(iblok, rd); + if (chars == CHAR_ASCII) + ascconv(iblok, rd); + charconv(iblok, rd); + } + } +} + +int +main(int argc, char **argv) +{ + const char *cp; + int o, i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '-' && + argv[1][2] == '\0') + o = 2; + else + o = 1; + while (o < argc) { + for (i = 0; argtab[i].a_name; i++) { + if ((cp = thisarg(argv[o], argtab[i].a_name)) != 0) { + argtab[i].a_func(cp); + break; + } + } + if (argtab[i].a_name == NULL) + badarg(argv[o]); + o++; + } + if ((sigset(SIGINT, SIG_IGN)) != SIG_IGN) + sigset(SIGINT, onint); + prepare(); + dd(); + quit(0); + /*NOTREACHED*/ + return 0; +} diff --git a/dd/mkfile b/dd/mkfile @@ -0,0 +1,7 @@ +BIN = dd +OBJ = dd.o +INSTALL_BIN = dd +INSTALL_MAN1 = dd.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/diff/diff.1 b/diff/diff.1 @@ -0,0 +1,493 @@ +.\" +.\" Copyright (c) 1980 Regents of the University of California. +.\" All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" +.\" from 4.3BSD diff.1 6.4 (Berkeley) 5/19/86 +.\" +.\" This code contains changes by +.\" Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. +.\" +.\" Conditions 1, 2, and 4 and the no-warranty notice below apply +.\" to these changes. +.\" +.\" +.\" Copyright (c) 1980, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowedgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH DIFF 1 "6/28/05" "Heirloom Toolchest" "User Commands" +.SH NAME +diff \- differential file comparator +.SH SYNOPSIS +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiptw\fR] +[\fB\-cefhnu\fR] +[\fB\-C\ \fInumber\fR] +[\fB\-U\ \fInumber\fR] +\fIfile1\fR \fIfile2\fR +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiptw\fR] +[\fB\-D\ \fIstring\fR] +\fIfile1\fR \fIfile2\fR +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiNptw12\fR] +[\fB\-cefhnu\fR] +[\fB\-C\ \fInumber\fR] +[\fB\-U\ \fInumber\fR] +[\fB\-lrs\fR] +[\fB\-S\ \fIname\fR] +[\fB\-x\ \fIpattern\fR] +[\fB\-X\ \fIname\fR] +\fIdirectory1\fR \fIdirectory2\fR +.br +.hy 1 +.SH DESCRIPTION +.I Diff +tells what lines must be changed in two files to bring them +into agreement. +If +.I file1 +.RI ( file2 ) +is `\-', the standard input is used. +If +.I file1 +.RI ( file2 ) +is a directory, then a file in that directory +whose file-name is the same as the file-name of +.I file2 +.RI ( file1 ) +is used. +The normal output contains lines of these forms: +.IP "" 5 +.I n1 +a +.I n3,n4 +.br +.I n1,n2 +d +.I n3 +.br +.I n1,n2 +c +.I n3,n4 +.PP +These lines resemble +.I ed +commands to convert +.I file1 +into +.IR file2 . +The numbers after the letters pertain to +.IR file2 . +In fact, by exchanging `a' for `d' and reading backward +one may ascertain equally how to convert +.I file2 +into +.IR file1 . +As in +.I ed, +identical pairs where +.I n1 += +.I n2 +or +.I n3 += +.I n4 +are abbreviated as a single number. +.PP +Following each of these lines come all the lines that are +affected in the first file flagged by `<', +then all the lines that are affected in the second file +flagged by `>'. +.TP 10 +.B \-a +causes a list of differences to be output +for all files, +even for those found to have binary content. +This option is an extension. +.TP 10 +.B \-b +causes trailing whitespace characters +to be ignored, and other +strings of whitespace to compare equal. +.TP 10 +.B \-i +ignores the case of letters. E.g., ``A'' will compare equal to ``a''. +.TP 10 +.B \-p +causes the name of the surrounding C function, +or, more exactly, +of the first previous unchanged line +beginning with a letter, the dollar sign, or the underscore, +to be output with each set of changes. +Implies +.I \-c +unless +.I \-u +is also present. +This option is an extension. +.TP 10 +.B \-t +will expand tabs in output lines. Normal, +.B \-c +or +.B \-u +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. This option will preserve the original source's +indentation. +.TP 10 +.B \-w +is similar to +.B \-b +but causes whitespace characters +to be totally ignored. +E.g., ``if\ (\ a\ ==\ b\ )'' will compare equal to ``if(a==b)''. +.TP 10 +.B \-B +causes changes that consist entirely of empty lines added or deleted +to be ignored. +This option is an extension. +.PP +The following options are mutually exclusive: +.TP 10 +.B \-c +produces a diff with three lines of context. +With +.B \-c +the output format is modified slightly: +the output beginning with identification of the files involved and +their creation dates and then each change is separated +by a line with a dozen *'s. +The lines removed from +.I file1 +are marked with `\(mi '; those added to +.I file2 +are marked `+ '. Lines which are changed from one +file to the other are marked in both files with with `! '. +.\".sp +.\"Changes which lie within <context> lines of each other are grouped +.\"together on output. (This is a change from the previous ``diff -c'' +.\"but the resulting output is usually much easier to interpret.) +.TP 10 +\fB\-C\ \fInumber\fR +Same as +.B \-c +but uses +.I number +of lines of context. +.TP 10 +\fB\-D\ \fIstring\fR +causes +.I diff +to create a merged version of +.I file1 +and +.I file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining \fIstring\fR is equivalent +to compiling +.I file1, +while defining +.I string +will yield +.I file2. +.TP 10 +.B \-e +produces a script of +.I "a, c" +and +.I d +commands for the editor +.I ed, +which will recreate +.I file2 +from +.IR file1 . +In connection with +.BR \-e , +the following shell program may help maintain +multiple versions of a file. +Only an ancestral file ($1) and a chain of +version-to-version +.I ed +scripts ($2,$3,...) made by +.I diff +need be on hand. +A `latest version' appears on +the standard output. +.IP +\ \ \ \ \ \ \ \ (shift; cat $*; echo \'1,$p\') \(bv ed \- $1 +.IP +Extra commands are added to the output when comparing directories with +.B \-e, +so that the result is a +.IR sh (1) +script for converting text files which are common to the two directories +from their state in +.I dir1 +to their state in +.I dir2. +.TP 10 +.B \-f +produces a script similar to that of +.B \-e, +not useful with +.I ed, +and in the opposite order. +.TP 10 +.B \-h +does a fast, half-hearted job. +It works only when changed stretches are short +and well separated, +but does work on files of unlimited length. +.TP 10 +.B \-n +produces a script similar to that of +.B \-e, +but in the opposite order and with a count of changed lines on each +insert or delete command. +.\"This is the form used by +.\".IR rcsdiff (1). +.TP 10 +.B \-u +produces a unified diff with three lines of context. +The output begins with identification of the files involved +and their creation dates, +followed by the changes +separated by `@@ \-range +range @@'. +Lines removed from +.I file1 +are marked with `\(mi', +those added to +.I file2 +are marked `+'. +This option is an extension. +.TP 10 +\fB\-U\ \fInumber\fR +Same as +.B \-u +but uses +.I number +of lines of context. +This option is an extension. +.PP +If both arguments are directories, +.I diff +sorts the contents of the directories by name, and then runs the +regular file +.I diff +algorithm on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are listed. +.PP +Options when comparing directories are: +.TP 10 +.B \-l +long output format; each text file +.I diff +is piped through +.IR pr (1) +to paginate it, +other differences are remembered and summarized +after all text file differences are reported. +.TP 10 +.B \-N +causes the text of files +that exist in one directory only +to be output +as if compared to an empty file modified at 1/1/70. +This option is an extension. +.TP 10 +.B \-1 +is similar to +.IR \-N , +but causes just the text of files that exist in +.I directory1 +only to be output. +Files that exist only in +.I directory2 +are listed. +This option is an extension. +.TP 10 +.B \-2 +is similar to +.IR \-N , +but causes just the text of files that exist in +.I directory2 +only to be output. +Files that exist only in +.I directory1 +are listed. +This option is an extension. +.TP 10 +.B \-r +causes application of +.I diff +recursively to common subdirectories encountered. +.TP 10 +.B \-s +causes +.I diff +to report files which are the same, which are otherwise not mentioned. +.TP 10 +.BI \-S " name" +starts a directory +.I diff +in the middle beginning with file +.I name. +.TP 10 +.BI \-x " pattern" +excludes all file names that match +.I pattern +(as described in +.IR glob (7)) +from comparison. +If +.I pattern +matches a directory, +files below that directory are also excluded. +This option is an extension. +.TP 10 +.BI \-X " name" +excludes all file names contained in +.IR name . +This option is an extension. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale(7). +.TP +.B LC_CTYPE +Sets the mapping of bytes to characters, +character case translation +and the set of whitespace characters. +.TP +.B SYSV3 +If this variable is set, +invalid options are ignored instead of being rejected, +and the text of some diagnostic messages is changed. +.SH FILES +.ta \w'/usr/5lib/diffh 'u +/tmp/d????? +.br +/usr/5lib/diffh for \fB\-h\fR +.br +diff for directory diffs +.br +pr +.SH "SEE ALSO" +bdiff(1), +cmp(1), +cc(1), +comm(1), +ed(1), +diff3(1), +patch(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 for no differences, 1 for some, 2 for trouble. +.SH NOTES +Editing scripts produced under the +.BR \-e " or" +.BR \-f " option are naive about" +creating lines consisting of a single `\fB.\fR'. +.PP +When comparing directories with the +\fB\-b, \-w\fP, or \fB\-i\fP +options specified, +.I diff +first compares the files ala +.I cmp, +and then decides to run the +.I diff +algorithm if they are not equal. +This may cause a small amount of spurious output if the files +then turn out to be identical because the only differences are +insignificant blank string or case differences. +.PP +When +.I diff +output is used with +.IR ed (1) +or +.IR patch (1) +for file synchronization, +it is recommended that it is run in the +.I C +or another single-byte LC_CTYPE locale +since character-to-byte conversion +might otherwise hide some changes. diff --git a/diff/diff.c b/diff/diff.c @@ -0,0 +1,473 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diff.c 1.24 (gritter) 3/27/05> */ +/* from 4.3BSD diff.c 4.6 4/3/86 */ + +#include "diff.h" +#include <unistd.h> +#include <locale.h> +#include <iblok.h> +/* + * diff - driver and subroutines + */ + +const char diff[] = "diff"; +const char diffh[] = DIFFH; +const char pr[] = "pr"; +const char *progname; +const char *argv0; + +static void usage(void); +static void xadd(const char *); +static void Xadd(const char *); + +int +main(int argc, char **argv) +{ + int i, invalid = 0; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (getenv("SYSV3") != NULL) + sysv3 = 1; + ifdef1 = "FILE1"; ifdef2 = "FILE2"; + status = 2; + argv0 = argv[0]; + diffargv = argv; + while ((i = getopt(argc, argv, ":D:efnbBwitcC:uU:hS:rslNx:a12pX:")) + != EOF) { + switch (i) { +#ifdef notdef + case 'I': + opt = D_IFDEF; + wantelses = 0; + break; + case 'E': + opt = D_IFDEF; + wantelses = 1; + break; + case '1': + opt = D_IFDEF; + ifdef1 = optarg; + break; +#endif + case 'D': + /* -Dfoo = -E -1 -2foo */ + wantelses = 1; + ifdef1 = ""; + /* fall through */ +#ifdef notdef + case '2': +#endif + opt = D_IFDEF; + ifdef2 = optarg; + break; + case 'e': + opt = D_EDIT; + break; + case 'f': + opt = D_REVERSE; + break; + case 'n': + opt = D_NREVERSE; + break; + case 'b': + bflag = 1; + break; + case 'B': + Bflag = 1; + break; + case 'w': + wflag = 1; + break; + case 'i': + iflag = 1; + break; + case 't': + tflag = 1; + break; + case 'c': + opt = D_CONTEXT; + context = 3; + break; + case 'C': { + char *x; + + opt = D_CONTEXT; + context = strtol(optarg, &x, 10); + if (*x != '\0' || *optarg == '+' || *optarg == '-') { + fprintf(stderr, "%s: use -C num\n", progname); + done(); + } + break; + } + case 'u': + opt = D_UNIFIED; + context = 3; + break; + case 'U': { + char *x; + + opt = D_UNIFIED; + context = strtol(optarg, &x, 10); + if (*x != '\0' || *optarg == '+' || *optarg == '-') { + fprintf(stderr, "%s: use -U num\n", progname); + done(); + } + break; + } + case 'h': + hflag++; + break; + case 'S': + start = optarg; + break; + case 'r': + rflag++; + break; + case 's': + sflag++; + break; + case 'l': + lflag++; + break; + case 'N': + Nflag |= 3; + break; + case '1': + Nflag |= 1; + break; + case '2': + Nflag |= 2; + break; + case 'x': + xadd(optarg); + break; + case 'a': + aflag++; + break; + case 'p': + pflag++; + break; + case 'X': + Xadd(optarg); + break; + default: + if (invalid == 0 && !sysv3) + invalid = optopt; + } + } + argv += optind, argc -= optind; + if (argc != 2) { + fprintf(stderr, sysv3 ? "%s: arg count\n" : + "%s: two filename arguments required\n", + progname); + done(); + } + file1 = argv[0]; + file2 = argv[1]; + if (invalid) { + fprintf(stderr, "%s: invalid option -%c\n", progname, invalid); + usage(); + } + if (pflag) { + if (opt == D_UNIFIED || opt == D_CONTEXT) + /*EMPTY*/; + else if (opt == 0) { + opt = D_CONTEXT; + context = 3; + } else { + fprintf(stderr, + "%s: -p doesn't support -e, -f, -n, or -I\n", + progname); + done(); + } + } + if (hflag && opt) { + fprintf(stderr, + "%s: -h doesn't support -e, -f, -n, -c, -u, or -I\n", + progname); + done(); + } + diffany(argv); + /*NOTREACHED*/ + return 0; +} + +void +diffany(char **argv) +{ + if (!strcmp(file1, "-")) + stb1.st_mode = S_IFREG; + else if (stat(file1, &stb1) < 0) { + if (sysv3) + stb1.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } + if (!strcmp(file2, "-")) + stb2.st_mode = S_IFREG; + else if (stat(file2, &stb2) < 0) { + if (sysv3) + stb2.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + if ((stb1.st_mode & S_IFMT) == S_IFDIR && + (stb2.st_mode & S_IFMT) == S_IFDIR) { + diffdir(argv); + } else + diffreg(); + done(); +} + +static void +usage(void) +{ + fprintf(stderr, "\ +usage: %s [ -bcefhilnrstw -C num -D string -S name ] file1 file2\n", + progname); + done(); +} + +int +min(int a,int b) +{ + + return (a < b ? a : b); +} + +int +max(int a,int b) +{ + + return (a > b ? a : b); +} + +void +done(void) +{ + if (tempfile1) { + unlink(tempfile1); + tempfile1 = NULL; + } + if (tempfile2) { + unlink(tempfile2); + tempfile2 = NULL; + } + if (recdepth == 0) + exit(status); + else + longjmp(recenv, 1); +} + +static void noroom(void); + +void * +dalloc(size_t n) +{ + struct stackblk *sp; + + if ((sp = malloc(n + sizeof *sp)) != NULL) { + sp->s_prev = NULL; + sp->s_next = curstack; + if (curstack) + curstack->s_prev = sp; + curstack = sp; + return (char *)sp + sizeof *sp; + } else + return NULL; +} + +void * +talloc(size_t n) +{ + register void *p; + + if ((p = dalloc(n)) == NULL) + noroom(); + return p; +} + +void * +ralloc(void *p,size_t n) +{ + struct stackblk *sp, *sq; + + if (p == NULL) + return talloc(n); + sp = (struct stackblk *)((char *)p - sizeof *sp); + if ((sq = realloc(sp, n + sizeof *sp)) == NULL) + noroom(); + if (sq->s_prev) + sq->s_prev->s_next = sq; + if (sq->s_next) + sq->s_next->s_prev = sq; + if (curstack == sp) + curstack = sq; + return (char *)sq + sizeof *sq; +} + +void +tfree(void *p) +{ + struct stackblk *sp; + + if (p == NULL) + return; + sp = (struct stackblk *)((char *)p - sizeof *sp); + if (sp->s_prev) + sp->s_prev->s_next = sp->s_next; + if (sp->s_next) + sp->s_next->s_prev = sp->s_prev; + if (sp == curstack) + curstack = sp->s_next; + free(sp); +} + +void +purgestack(void) +{ + struct stackblk *sp = curstack, *sq = NULL; + + do { + free(sq); + sq = sp; + if (sp) + sp = sp->s_next; + } while (sq); +} + +static void +noroom(void) +{ + oomsg(": files too big, try -h\n"); + status = 2; + done(); +} + +static void +xadd(const char *cp) +{ + struct xclusion *xp; + + xp = talloc(sizeof *xp); + xp->x_pat = cp; + xp->x_nxt = xflag; + xflag = xp; +} + +static void +Xadd(const char *name) +{ + struct iblok *ip; + char *line = NULL; + size_t size = 0, len; + + if (name[0] == '-' && name[1] == '\0') + ip = ib_alloc(0, 0); + else + ip = ib_open(name, 0); + if (ip == NULL) { + fprintf(stderr, "%s: -X %s: %s\n", progname, name, + strerror(errno)); + done(); + } + while ((len = ib_getlin(ip, &line, &size, realloc)) != 0) { + if (line[len-1] == '\n') + line[--len] = '\0'; + xadd(line); + line = NULL; + size = 0; + } + free(line); + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); +} + +void +oomsg(const char *s) +{ + write(2, progname, strlen(progname)); + write(2, s, strlen(s)); +} diff --git a/diff/diff.h b/diff/diff.h @@ -0,0 +1,211 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diff.h 1.15 (gritter) 3/26/05> */ +/* from 4.3BSD diff.h 4.7 85/08/16 */ + +/* + * diff - common declarations + */ + +#include <stdio.h> +#include <ctype.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <dirent.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <libgen.h> +#include <errno.h> +#include <setjmp.h> + +#if defined (__GLIBC__) +#if defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +#if defined (_IO_putc_unlocked) +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif + +/* + * Output format options + */ +int opt; + +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_IFDEF 3 /* Diff with merged #ifdef's */ +#define D_NREVERSE 4 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_UNIFIED 5 /* Unified diff */ + +int aflag; /* diff binary files */ +int tflag; /* expand tabs on output */ +int pflag; /* show surrounding C function */ + +/* + * Algorithm related options + */ +int hflag; /* -h, use halfhearted DIFFH */ +int bflag; /* ignore blanks in comparisons */ +int wflag; /* totally ignore blanks in comparisons */ +int iflag; /* ignore case in comparisons */ +int Bflag; /* ignore changes that consist of blank lines */ + +/* + * Options on hierarchical diffs. + */ +int lflag; /* long output format with header */ +int rflag; /* recursively trace directories */ +int sflag; /* announce files which are same */ +int Nflag; /* write text of nonexistant files */ +const char *start; /* do file only if name >= this */ + +struct xclusion { + struct xclusion *x_nxt; + const char *x_pat; +} *xflag; /* patterns to exclude from comparison */ + +/* + * Variables for -I D_IFDEF option. + */ +int wantelses; /* -E */ +char *ifdef1; /* String for -1 */ +char *ifdef2; /* String for -2 */ +char *endifname; /* What we will print on next #endif */ +int inifdef; + +/* + * Variables for -c context option. + */ +int context; /* lines of context to be printed */ + +/* + * State for exit status. + */ +int status; +int anychange; +char *tempfile1; /* used when comparing against std input */ +char *tempfile2; /* used when comparing against std input */ + +/* + * Variables for diffdir. + */ +char **diffargv; /* option list to pass to recursive diffs */ +int recdepth; /* recursion depth */ +jmp_buf recenv; /* jump stack on error */ + +struct stackblk { + struct stackblk *s_prev; + struct stackblk *s_next; +} *curstack; + +/* + * Input file names. + * With diffdir, file1 and file2 are allocated BUFSIZ space, + * and padded with a '/', and then efile0 and efile1 point after + * the '/'. + */ +char *file1, *file2, *efile1, *efile2; +struct stat stb1, stb2; + +extern const char diffh[], diff[], pr[]; +extern const char *argv0; +extern const char *progname; +int mb_cur_max; +extern int sysv3; + +/* diff.c */ +void diffany(char **); +int min(int, int); +int max(int, int); +void done(void); +void *dalloc(size_t); +void *talloc(size_t); +void *ralloc(void *, size_t); +void tfree(void *); +void purgestack(void); +void oomsg(const char *); +/* diffdir.c */ +void diffdir(char **); +int ascii(int); +/* diffreg.c */ +void diffreg(void); diff --git a/diff/diffdir.c b/diff/diffdir.c @@ -0,0 +1,993 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diffdir.c 1.30 (gritter) 1/22/06> */ +/* from 4.3BSD diffdir.c 4.9 (Berkeley) 8/28/84 */ + +#include "diff.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <signal.h> +#include "sigset.h" +#include "pathconf.h" + +#ifdef __GLIBC__ /* old glibcs don't know _XOPEN_SOURCE=600L yet */ +#ifndef S_IFSOCK +#ifdef __S_IFSOCK +#define S_IFSOCK __S_IFSOCK +#endif /* __S_IFSOCK */ +#endif /* !S_IFSOCK */ +#endif /* __GLIBC__ */ + +/* + * diff - directory comparison + */ +#define d_flags d_ino + +#define ONLY 1 /* Only in this directory */ +#define SAME 2 /* Both places and same */ +#define DIFFER 4 /* Both places and different */ +#define DIRECT 8 /* Directory */ + +struct dir { + unsigned long long d_ino; + char *d_entry; +}; + +static int header; +static char *title, *etitle; +static size_t titlesize; +static char procself[40]; + +static void setfile(char **, char **, const char *); +static void scanpr(register struct dir *, int, const char *, const char *, + const char *, const char *, const char *); +static void only(struct dir *, int); +static struct dir *setupdir(const char *); +static int entcmp(const struct dir *, const struct dir *); +static void compare(struct dir *, char **); +static void calldiff(const char *, char **); +static int useless(register const char *); +static const char *mtof(mode_t mode); +static void putN(const char *, const char *, const char *, int); +static void putNreg(const char *, const char *, time_t, int); +static void putNnorm(FILE *, const char *, const char *, + FILE *, long long, int); +static void putNedit(FILE *, const char *, const char *, + FILE *, long long, int, int); +static void putNcntx(FILE *, const char *, const char *, + time_t, FILE *, long long, int); +static void putNunif(FILE *, const char *, const char *, + time_t, FILE *, long long, int); +static void putNhead(FILE *, const char *, const char *, time_t, int, + const char *, const char *); +static void putNdata(FILE *, FILE *, int, int); +static void putNdir(const char *, const char *, int); +static long long linec(const char *, FILE *); +static char *mkpath(const char *, const char *); +static void mktitle(void); +static int xclude(const char *); + +void +diffdir(char **argv) +{ + register struct dir *d1, *d2; + struct dir *dir1, *dir2; + register int i, n; + int cmp; + + if (opt == D_IFDEF) { + fprintf(stderr, "%s: can't specify -I with directories\n", + progname); + done(); + } + status = 0; + if (opt == D_EDIT && (sflag || lflag)) + fprintf(stderr, + "%s: warning: shouldn't give -s or -l with -e\n", + progname); + for (n = 6, i = 1; diffargv[i+2]; i++) + n += strlen(diffargv[i]) + 1; + if (n > titlesize) + title = ralloc(title, titlesize = n); + title[0] = 0; + strcpy(title, "diff "); + for (i = 1; diffargv[i+2]; i++) { + if (!strcmp(diffargv[i], "-")) + continue; /* was -S, dont look silly */ + strcat(title, diffargv[i]); + strcat(title, " "); + } + for (etitle = title; *etitle; etitle++) + ; + /* + * This works around a bug present in (at least) Solaris 8 and + * 9: If exec() is called with /proc/self/object/a.out, the + * process hangs. It is possible, though, to use the executable + * of another process. So the parent diff is used instead of the + * forked child. + */ + i = getpid(); + snprintf(procself, sizeof procself, +#if defined (__linux__) + "/proc/%d/exe", +#elif defined (__FreeBSD__) || defined (__DragonFly__) || defined (__APPLE__) + "/proc/%d/file", +#else /* !__linux__, !__FreeBSD__, !__APPLE__ */ + "/proc/%d/object/a.out", +#endif /* !__linux__, !__FreeBSD__, !__APPLE__ */ + i); + setfile(&file1, &efile1, file1); + setfile(&file2, &efile2, file2); + argv[0] = file1; + argv[1] = file2; + dir1 = setupdir(file1); + dir2 = setupdir(file2); + d1 = dir1; d2 = dir2; + while (d1->d_entry != 0 || d2->d_entry != 0) { + if (d1->d_entry && useless(d1->d_entry)) { + d1++; + continue; + } + if (d2->d_entry && useless(d2->d_entry)) { + d2++; + continue; + } + if (d1->d_entry == 0) + cmp = 1; + else if (d2->d_entry == 0) + cmp = -1; + else + cmp = strcmp(d1->d_entry, d2->d_entry); + if (cmp < 0) { + if (lflag && !(Nflag&1)) + d1->d_flags |= ONLY; + else if (Nflag&1 || opt == D_NORMAL || + opt == D_CONTEXT || opt == D_UNIFIED) + only(d1, 1); + d1++; + } else if (cmp == 0) { + compare(d1, argv); + d1++; + d2++; + } else { + if (lflag && !(Nflag&2)) + d2->d_flags |= ONLY; + else if (Nflag&2 || opt == D_NORMAL || + opt == D_CONTEXT || opt == D_UNIFIED) + only(d2, 2); + d2++; + } + } + if (lflag) { + scanpr(dir1, ONLY, "Only in %.*s", file1, efile1, 0, 0); + scanpr(dir2, ONLY, "Only in %.*s", file2, efile2, 0, 0); + scanpr(dir1, SAME, "Common identical files in %.*s and %.*s", + file1, efile1, file2, efile2); + scanpr(dir1, DIFFER, "Binary files which differ in %.*s and %.*s", + file1, efile1, file2, efile2); + scanpr(dir1, DIRECT, "Common subdirectories of %.*s and %.*s", + file1, efile1, file2, efile2); + } + if (rflag) { + if (header && lflag) + printf("\f"); + for (d1 = dir1; d1->d_entry; d1++) { + if ((d1->d_flags & DIRECT) == 0) + continue; + strcpy(efile1, d1->d_entry); + strcpy(efile2, d1->d_entry); + calldiff(0, argv); + } + } +} + +static void +setfile(char **fpp, char **epp, const char *file) +{ + register char *cp; + int n; + + if ((n = pathconf(file, _PC_PATH_MAX)) < 1024) + n = 1024; + *fpp = dalloc(strlen(file) + 2 + n); + if (*fpp == 0) { + oomsg(": ran out of memory\n"); + exit(1); + } + strcpy(*fpp, file); + for (cp = *fpp; *cp; cp++) + continue; + *cp++ = '/'; + *cp = '\0'; + *epp = cp; +} + +static void +scanpr(register struct dir *dp, int test, const char *title, + const char *file1, const char *efile1, + const char *file2, const char *efile2) +{ + int titled = 0; + + for (; dp->d_entry; dp++) { + if ((dp->d_flags & test) == 0) + continue; + if (titled == 0) { + if (header == 0) + header = 1; + else + printf("\n"); + printf(title, + efile1 - file1 - 1, file1, + efile2 - file2 - 1, file2); + printf(":\n"); + titled = 1; + } + printf("\t%s\n", dp->d_entry); + } +} + +static void +only(struct dir *dp, int which) +{ + char *file = which == 1 ? file1 : file2; + char *other = which == 1 ? file2 : file1; + char *efile = which == 1 ? efile1 : efile2; + char *eother = which == 1 ? efile2 : efile1; + + if (Nflag&which) { + char c = file[efile - file - 1]; + char d = other[eother - other - 1]; + file[efile - file - 1] = '\0'; + other[eother - other - 1] = '\0'; + putN(file, other, dp->d_entry, which); + file[efile - file - 1] = c; + other[eother - other - 1] = d; + } else + printf("Only in %.*s: %s\n", (int)(efile - file - 1), file, + dp->d_entry); + status = 1; +} + +static struct dir * +setupdir(const char *cp) +{ + register struct dir *dp = 0, *ep; + register struct dirent *rp; + register int nitems; + DIR *dirp; + + dirp = opendir(cp); + if (dirp == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, cp, strerror(errno)); + done(); + } + nitems = 0; + dp = dalloc(sizeof (struct dir)); + if (dp == 0) { + oomsg(": ran out of memory\n"); + status = 2; + done(); + } + while (rp = readdir(dirp)) { + if (xflag && xclude(rp->d_name)) + continue; + ep = &dp[nitems++]; + ep->d_entry = 0; + ep->d_flags = 0; + ep->d_entry = dalloc(strlen(rp->d_name) + 1); + if (ep->d_entry == 0) { + oomsg(": out of memory\n"); + status = 2; + done(); + } + strcpy(ep->d_entry, rp->d_name); + dp = ralloc(dp, (nitems + 1) * sizeof (struct dir)); + } + dp[nitems].d_entry = 0; /* delimiter */ + closedir(dirp); + qsort(dp, nitems, sizeof (struct dir), + (int (*)(const void *, const void *))entcmp); + return (dp); +} + +static int +entcmp(const struct dir *d1, const struct dir *d2) +{ + return (strcmp(d1->d_entry, d2->d_entry)); +} + +static void +compare(struct dir *dp, char **argv) +{ + register int i, j; + int f1 = -1, f2 = -1; + mode_t fmt1, fmt2; + struct stat stb1, stb2; + char buf1[BUFSIZ], buf2[BUFSIZ]; + + strcpy(efile1, dp->d_entry); + strcpy(efile2, dp->d_entry); + if (stat(file1, &stb1) < 0 || (fmt1 = stb1.st_mode&S_IFMT) == S_IFREG && + (f1 = open(file1, O_RDONLY)) < 0) { + perror(file1); + status = 2; + return; + } + if (stat(file2, &stb2) < 0 || (fmt2 = stb2.st_mode&S_IFMT) == S_IFREG && + (f2 = open(file2, O_RDONLY)) < 0) { + perror(file2); + close(f1); + status = 2; + return; + } + if (fmt1 != S_IFREG || fmt2 != S_IFREG) { + if (fmt1 == fmt2) { + switch (fmt1) { + case S_IFDIR: + dp->d_flags = DIRECT; + if (lflag || opt == D_EDIT) + goto closem; + if (opt != D_UNIFIED) + printf("Common subdirectories: " + "%s and %s\n", + file1, file2); + goto closem; + case S_IFBLK: + case S_IFCHR: + if (stb1.st_rdev == stb2.st_rdev) + goto same; + printf("Special files %s and %s differ\n", + file1, file2); + break; + case S_IFIFO: + if (stb1.st_dev == stb2.st_dev && + stb1.st_ino == stb2.st_ino) + goto same; + printf("Named pipes %s and %s differ\n", + file1, file2); + break; + default: + printf("Don't know how to compare " + "%ss %s and %s\n", + mtof(fmt1), file1, file2); + } + } else + printf("File %s is a %s while file %s is a %s\n", + file1, mtof(fmt1), file2, mtof(fmt2)); + if (lflag) + dp->d_flags |= DIFFER; + status = 1; + goto closem; + } + if (stb1.st_size != stb2.st_size) + goto notsame; + if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino) + goto same; + for (;;) { + i = read(f1, buf1, BUFSIZ); + j = read(f2, buf2, BUFSIZ); + if (i < 0 || j < 0 || i != j) + goto notsame; + if (i == 0 && j == 0) + goto same; + for (j = 0; j < i; j++) + if (buf1[j] != buf2[j]) + goto notsame; + } +same: + if (sflag == 0) + goto closem; + if (lflag) + dp->d_flags = SAME; + else + printf("Files %s and %s are identical\n", file1, file2); + goto closem; +notsame: + if (!aflag && (!ascii(f1) || !ascii(f2))) { + if (lflag) + dp->d_flags |= DIFFER; + else if (opt == D_NORMAL || opt == D_CONTEXT || + opt == D_UNIFIED) + printf("Binary files %s and %s differ\n", + file1, file2); + status = 1; + goto closem; + } + close(f1); close(f2); + anychange = 1; + if (lflag) + calldiff(title, argv); + else { + if (opt == D_EDIT) { + printf("ed - %s << '-*-END-*-'\n", dp->d_entry); + calldiff(0, argv); + } else { + printf("%s%s %s\n", title, file1, file2); + calldiff(0, argv); + } + if (opt == D_EDIT) + printf("w\nq\n-*-END-*-\n"); + } + return; +closem: + close(f1); close(f2); +} + +static void +stackdiff(char **argv) +{ + int oanychange; + char *ofile1, *ofile2, *oefile1, *oefile2; + struct stat ostb1, ostb2; + struct stackblk *ocurstack; + char *oargv[2]; + int oheader; + char *otitle, *oetitle; + size_t otitlesize; + jmp_buf orecenv; + + (void)&oargv; + recdepth++; + oanychange = anychange; + ofile1 = file1; + ofile2 = file2; + oefile1 = efile1; + oefile2 = efile2; + ostb1 = stb1; + ostb2 = stb2; + ocurstack = curstack; + oargv[0] = argv[0]; + oargv[1] = argv[1]; + oheader = header; + otitle = title; + oetitle = etitle; + otitlesize = titlesize; + memcpy(orecenv, recenv, sizeof orecenv); + + anychange = 0; + file1 = argv[0]; + file2 = argv[1]; + efile1 = NULL; + efile2 = NULL; + curstack = NULL; + header = 0; + title = NULL; + etitle = NULL; + titlesize = 0; + + if (setjmp(recenv) == 0) + diffany(argv); + purgestack(); + + anychange = oanychange; + file1 = ofile1; + file2 = ofile2; + efile1 = oefile1; + efile2 = oefile2; + stb1 = ostb1; + stb2 = ostb2; + curstack = ocurstack; + argv[0] = oargv[0]; + argv[1] = oargv[1]; + header = oheader; + title = otitle; + etitle = oetitle; + titlesize = otitlesize; + memcpy(recenv, orecenv, sizeof recenv); + recdepth--; +} + +static const char *prargs[] = { "pr", "-h", 0, "-f", 0, 0 }; + +static void +calldiff(const char *wantpr, char **argv) +{ + int pid, cstatus, cstatus2, pv[2]; + + if (wantpr == NULL && hflag == 0) { + stackdiff(argv); + return; + } + prargs[2] = wantpr; + fflush(stdout); + if (wantpr) { + mktitle(); + pipe(pv); + pid = fork(); + if (pid == -1) { + fprintf(stderr, "No more processes\n"); + done(); + } + if (pid == 0) { + close(0); + dup(pv[0]); + close(pv[0]); + close(pv[1]); + execvp(pr, (char **)prargs); + perror(pr); + done(); + } + } + pid = fork(); + if (pid == -1) { + fprintf(stderr, "%s: No more processes\n", progname); + done(); + } + if (pid == 0) { + if (wantpr) { + close(1); + dup(pv[1]); + close(pv[0]); + close(pv[1]); + } + execv(procself, diffargv); + execv(argv0, diffargv); + execvp(diff, diffargv); + perror(diff); + done(); + } + if (wantpr) { + close(pv[0]); + close(pv[1]); + } + while (wait(&cstatus) != pid) + continue; + if (cstatus != 0) { + if (WIFEXITED(cstatus) && WEXITSTATUS(cstatus) == 1) + status = 1; + else + status = 2; + } + while (wait(&cstatus2) != -1) + continue; +/* + if ((status >> 8) >= 2) + done(); +*/ +} + +int +ascii(int f) +{ + char buf[BUFSIZ]; + register int cnt; + register char *cp; + + lseek(f, 0, 0); + cnt = read(f, buf, BUFSIZ); + cp = buf; + while (--cnt >= 0) + if (*cp++ == '\0') + return (0); + return (1); +} + +/* + * THIS IS CRUDE. + */ +static int +useless(register const char *cp) +{ + + if (cp[0] == '.') { + if (cp[1] == '\0') + return (1); /* directory "." */ + if (cp[1] == '.' && cp[2] == '\0') + return (1); /* directory ".." */ + } + if (start && strcmp(start, cp) > 0) + return (1); + return (0); +} + +static const char * +mtof(mode_t mode) +{ + switch (mode) { + case S_IFDIR: + return "directory"; + case S_IFCHR: + return "character special file"; + case S_IFBLK: + return "block special file"; + case S_IFREG: + return "plain file"; + case S_IFIFO: + return "named pipe"; +#ifdef S_IFSOCK + case S_IFSOCK: + return "socket"; +#endif /* S_IFSOCK */ + default: + return "unknown type"; + } +} + +static void +putN(const char *dir, const char *odir, const char *file, int which) +{ + struct stat st; + char *path; + char *opath; + + path = mkpath(dir, file); + opath = mkpath(odir, file); + if (stat(path, &st) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, path, + strerror(errno)); + status = 2; + goto out; + } + switch (st.st_mode & S_IFMT) { + case S_IFREG: + putNreg(path, opath, st.st_mtime, which); + break; + case S_IFDIR: + putNdir(path, opath, which); + break; + default: + printf("Only in %s: %s\n", dir, file); + } +out: tfree(path); + tfree(opath); +} + +static void +putNreg(const char *fn, const char *on, time_t mtime, int which) +{ + long long lines; + FILE *fp; + FILE *op; + void (*opipe)(int) = SIG_DFL; + pid_t pid = 0; + + if ((fp = fopen(fn, "r")) == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, strerror(errno)); + status = 2; + return; + } + if ((lines = linec(fn, fp)) == 0 || fseek(fp, 0, SEEK_SET) != 0) + goto out; + if (lflag) { + int pv[2]; + opipe = sigset(SIGPIPE, SIG_IGN); + fflush(stdout); + prargs[2] = title; + pipe(pv); + switch (pid = fork()) { + case -1: + fprintf(stderr, "No more processes\n"); + done(); + /*NOTREACHED*/ + case 0: + close(0); + dup(pv[0]); + close(pv[0]); + close(pv[1]); + execvp(pr, (char **)prargs); + perror(pr); + done(); + } + close(pv[0]); + op = fdopen(pv[1], "w"); + } else + op = stdout; + fprintf(op, "%.*s %s %s\n", (int)(etitle - title - 1), title, + which == 1 ? fn : on, + which == 1 ? on : fn); + switch (opt) { + case D_NORMAL: + putNnorm(op, fn, on, fp, lines, which); + break; + case D_EDIT: + putNedit(op, fn, on, fp, lines, which, 0); + break; + case D_REVERSE: + putNedit(op, fn, on, fp, lines, which, 1); + break; + case D_CONTEXT: + putNcntx(op, fn, on, mtime, fp, lines, which); + break; + case D_NREVERSE: + putNedit(op, fn, on, fp, lines, which, 2); + break; + case D_UNIFIED: + putNunif(op, fn, on, mtime, fp, lines, which); + break; + } + if (lflag) { + fclose(op); + while (wait(NULL) != pid); + sigset(SIGPIPE, opipe); + } +out: fclose(fp); +} + +static void +putNnorm(FILE *op, const char *fn, const char *on, + FILE *fp, long long lines, int which) +{ + int pfx; + + if (which == 1) { + fprintf(op, "1,%lldd0\n", lines); + pfx = '<'; + } else { + fprintf(op, "0a1,%lld\n", lines); + pfx = '>'; + } + putNdata(op, fp, pfx, ' '); +} + +static void +putNedit(FILE *op, const char *fn, const char *on, + FILE *fp, long long lines, int which, int reverse) +{ + switch (reverse) { + case 0: + if (which == 1) + fprintf(op, "1,%lldd\n", lines); + else { + fprintf(op, "0a\n"); + putNdata(op, fp, 0, 0); + fprintf(op, ".\n"); + } + break; + case 1: + if (which == 1) + fprintf(op, "d1 %lld\n", lines); + else { + fprintf(op, "a0\n"); + putNdata(op, fp, 0, 0); + fprintf(op, ".\n"); + } + break; + case 2: + if (which == 1) + fprintf(op, "d1 %lld\n", lines); + else { + fprintf(op, "a0 %lld\n", lines); + putNdata(op, fp, 0, 0); + } + break; + } +} + +static void +putNcntx(FILE *op, const char *fn, const char *on, time_t mtime, + FILE *fp, long long lines, int which) +{ + putNhead(op, fn, on, mtime, which, "***", "---"); + fprintf(op, "***************\n*** "); + if (which == 1) + fprintf(op, "1,%lld", lines); + else + putc('0', op); + fprintf(op, " ****\n"); + if (which != 1) + fprintf(op, "--- 1,%lld ----\n", lines); + putNdata(op, fp, which == 1 ? '-' : '+', ' '); + if (which == 1) + fprintf(op, "--- 0 ----\n"); +} + +static void +putNunif(FILE *op, const char *fn, const char *on, time_t mtime, + FILE *fp, long long lines, int which) +{ + putNhead(op, fn, on, mtime, which, "---", "+++"); + fprintf(op, "@@ "); + fprintf(op, which == 1 ? "-1,%lld +0,0" : "-0,0 +1,%lld", lines); + fprintf(op, " @@\n"); + putNdata(op, fp, which == 1 ? '-' : '+', 0); +} + +static void +putNhead(FILE *op, const char *fn, const char *on, time_t mtime, int which, + const char *p1, const char *p2) +{ + time_t t1, t2; + const char *f1, *f2; + + t1 = which == 1 ? mtime : 0; + t2 = which == 1 ? 0 : mtime; + f1 = which == 1 ? fn : on; + f2 = which == 1 ? on : fn; + fprintf(op, "%s %s\t%s", p1, f1, ctime(&t1)); + fprintf(op, "%s %s\t%s", p2, f2, ctime(&t2)); +} + +static void +putNdata(FILE *op, FILE *fp, int pfx, int sec) +{ + int c, lastc = '\n', col = 0; + + while ((c = getc(fp)) != EOF) { + if (lastc == '\n') { + col = 0; + if (pfx) + putc(pfx, op); + if (sec) + putc(sec, op); + } + if (c == '\t' && tflag) { + do + putc(' ', op); + while (++col & 7); + } else { + putc(c, op); + col++; + } + lastc = c; + } + if (lastc != '\n') { + if (aflag) + fprintf(op, "\n\\ No newline at end of file\n"); + else + putc('\n', op); + } +} + +static void +putNdir(const char *fn, const char *on, int which) +{ + DIR *Dp; + struct dirent *dp; + + if ((Dp = opendir(fn)) == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, strerror(errno)); + status = 2; + return; + } + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + dp->d_name[1] == '.' && + dp->d_name[2] == '\0')) + continue; + if (xflag && xclude(dp->d_name)) + continue; + putN(fn, on, dp->d_name, which); + } + closedir(Dp); +} + +static long long +linec(const char *fn, FILE *fp) +{ + int c, lastc = '\n'; + long long cnt = 0; + + while ((c = getc(fp)) != EOF) { + if (c == '\n') + cnt++; + lastc = c; + } + if (lastc != '\n') { + if (!aflag) + fprintf(stderr, + "Warning: missing newline at end of file %s\n", + fn); + cnt++; + } + return cnt; +} + +static char * +mkpath(const char *dir, const char *file) +{ + char *path, *pp; + const char *cp; + + pp = path = talloc(strlen(dir) + strlen(file) + 2); + for (cp = dir; *cp; cp++) + *pp++ = *cp; + if (pp > path && pp[-1] != '/') + *pp++ = '/'; + for (cp = file; *cp; cp++) + *pp++ = *cp; + *pp = '\0'; + return path; +} + +static void +mktitle(void) +{ + int n; + + n = strlen(file1) + strlen(file2) + 2; + if (etitle - title + n < titlesize) { + titlesize = n; + n = etitle - title; + title = ralloc(title, titlesize); + etitle = &title[n]; + } + sprintf(etitle, "%s %s", file1, file2); +} + +static int +xclude(const char *fn) +{ + extern int gmatch(const char *, const char *); + struct xclusion *xp; + + for (xp = xflag; xp; xp = xp->x_nxt) + if (gmatch(fn, xp->x_pat)) + return 1; + return 0; +} diff --git a/diff/diffh.c b/diff/diffh.c @@ -0,0 +1,410 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)diffh.sl 1.11 (gritter) 5/29/05"; + +/* from 4.3BSD diffh.c 4.4 11/27/85> */ + +#include <stdio.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <locale.h> +#include <wchar.h> +#include <wctype.h> + +#include <iblok.h> +#include <mbtowi.h> + +#define C 3 +#define RANGE 30 +#define INF 16384 + +#define next(wc, s, n) (*(s) & 0200 ? ((n) = mbtowi(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)) + +static char *text[2][RANGE]; +static size_t size[2][RANGE]; +static long long lineno[2] = {1, 1}; /*no. of 1st stored line in each file*/ +static int ntext[2]; /*number of stored lines in each*/ +static long long n0,n1; /*scan pointer in each*/ +static int bflag; +static int mb_cur_max; +static int debug = 0; +static struct iblok *file[2]; +static int eof[2]; + +static char *getl(int, long long); +static void clrl(int, long long); +static void movstr(int, int, int); +static int easysynch(void); +static int output(int, int); +static void change(long long, int, long long, int, const char *); +static void range(long long, int); +static int cmp(const char *, const char *); +static struct iblok *dopen(const char *, const char *); +static void progerr(const char *); +static void error(const char *, const char *); +static int hardsynch(void); +static void *lrealloc(void *, size_t); + + /* return pointer to line n of file f*/ +static char * +getl(int f, long long n) +{ + register int delta, nt; + size_t len; + + delta = n - lineno[f]; + nt = ntext[f]; + if(delta<0) + progerr("1"); + if(delta<nt) + return(text[f][delta]); + if(delta>nt) + progerr("2"); + if(nt>=RANGE) + progerr("3"); + if(eof[f]) + return(NULL); + len = ib_getlin(file[f], &text[f][nt], &size[f][nt], lrealloc); + if (len != 0) { + ntext[f]++; + return(text[f][nt]); + } else { + eof[f]++; + return NULL; + } +} + + /*remove thru line n of file f from storage*/ +static void +clrl(int f,long long n) +{ + register long long i,j; + j = n-lineno[f]+1; + for(i=0;i+j<ntext[f];i++) + movstr(f, i+j, i); + lineno[f] = n+1; + ntext[f] -= j; +} + +static void +movstr(register int f, register int i, register int j) +{ + free(text[f][j]); + text[f][j] = text[f][i]; + size[f][j] = size[f][i]; + text[f][i] = 0; + size[f][i] = 0; +} + +int +main(int argc,char **argv) +{ + char *s0,*s1; + register int c, status = 0; + + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + while((c=getopt(argc,argv,":b")) != EOF) { + switch (c) { + case 'b': + bflag++; + break; + } + } + if(argc-optind!=2) + error("must have 2 file arguments",""); + file[0] = dopen(argv[optind],argv[optind+1]); + file[1] = dopen(argv[optind+1],argv[optind]); + for(;;) { + s0 = getl(0,++n0); + s1 = getl(1,++n1); + if(s0==NULL||s1==NULL) + break; + if(cmp(s0,s1)!=0) { + if(!easysynch()&&!hardsynch()) + progerr("5"); + status = 1; + } else { + clrl(0,n0); + clrl(1,n1); + } + } + if(s0==NULL&&s1==NULL) + exit(status); + if(s0==NULL) + output(-1,INF); + if(s1==NULL) + output(INF,-1); + return (1); +} + + /* synch on C successive matches*/ +static int +easysynch(void) +{ + int i,j; + register int k,m; + char *s0,*s1; + for(i=j=1;i<RANGE&&j<RANGE;i++,j++) { + s0 = getl(0,n0+i); + if(s0==NULL) + return(output(INF,INF)); + for(k=C-1;k<j;k++) { + for(m=0;m<C;m++) + if(cmp(getl(0,n0+i-m), + getl(1,n1+k-m))!=0) + goto cont1; + return(output(i-C,k-C)); +cont1: ; + } + s1 = getl(1,n1+j); + if(s1==NULL) + return(output(INF,INF)); + for(k=C-1;k<=i;k++) { + for(m=0;m<C;m++) + if(cmp(getl(0,n0+k-m), + getl(1,n1+j-m))!=0) + goto cont2; + return(output(k-C,j-C)); +cont2: ; + } + } + return(0); +} + +static int +output(int a,int b) +{ + register int i; + char *s; + if(a<0) + change(n0-1,0,n1,b,"a"); + else if(b<0) + change(n0,a,n1-1,0,"d"); + else + change(n0,a,n1,b,"c"); + for(i=0;i<=a;i++) { + s = getl(0,n0+i); + if(s==NULL) + break; + printf("< %s",s); + clrl(0,n0+i); + } + n0 += i-1; + if(a>=0&&b>=0) + printf("---\n"); + for(i=0;i<=b;i++) { + s = getl(1,n1+i); + if(s==NULL) + break; + printf("> %s",s); + clrl(1,n1+i); + } + n1 += i-1; + return(1); +} + +static void +change(long long a,int b,long long c,int d,const char *s) +{ + range(a,b); + printf("%s",s); + range(c,d); + printf("\n"); +} + +static void +range(long long a,int b) +{ + if(b==INF) + printf("%lld,$",a); + else if(b==0) + printf("%lld",a); + else + printf("%lld,%lld",a,a+b); +} + +static int +cmp(const char *s,const char *t) +{ + if(debug) + printf("%s:%s\n",s,t); + for(;;){ + if(bflag) { + if (mb_cur_max > 1) { + wint_t wc, wd; + int n, m; + + if (next(wc, s, n), next(wd, t, m), + iswspace(wc) && iswspace(wd)) { + while (s += n, next(wc, s, n), + iswspace(wc)); + while (t += m, next(wd, t, m), + iswspace(wd)); + } + } else { + if (isspace(*s)&&isspace(*t)) { + while(isspace(*++s)) ; + while(isspace(*++t)) ; + } + } + } + if(*s!=*t||*s==0) + break; + s++; + t++; + } + return((*s&0377)-(*t&0377)); +} + +static struct iblok * +dopen(const char *f1,const char *f2) +{ + struct iblok *ip; + char *b=0,*bptr; + const char *eptr; + struct stat statbuf; + if(cmp(f1,"-")==0) + if(cmp(f2,"-")==0) + error("can't do - -",""); + else + return(ib_alloc(0, 0)); + if(stat(f1,&statbuf)==-1) + error("can't access ",f1); + if((statbuf.st_mode&S_IFMT)==S_IFDIR) { + b = lrealloc(0, strlen(f1) + strlen(f2) + 2); + for(bptr=b;*bptr= *f1++;bptr++) ; + *bptr++ = '/'; + for(eptr=f2;*eptr;eptr++) + if(*eptr=='/'&&eptr[1]!=0&&eptr[1]!='/') + f2 = eptr+1; + while(*bptr++= *f2++) ; + f1 = b; + } + ip = ib_open(f1,0); + if(ip==NULL) + error("can't open",f1); + if (b) + free(b); + return(ip); +} + +static void +progerr(const char *s) +{ + error("program error ",s); +} + +static void +error(const char *s,const char *t) +{ + fprintf(stderr,"diffh: %s%s\n",s,t); + exit(2); +} + + /*stub for resychronization beyond limits of text buf*/ +static int +hardsynch(void) +{ + change(n0,INF,n1,INF,"c"); + printf("---change record omitted\n"); + error("can't resynchronize",""); + return(0); +} + +static void * +lrealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == NULL) { + write(2, "diffh: line too long\n", 21); + _exit(1); + } + return np; +} diff --git a/diff/diffreg.c b/diff/diffreg.c @@ -0,0 +1,1629 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diffreg.c 1.30 (gritter) 3/15/07> */ +/* from 4.3BSD diffreg.c 4.16 3/29/86 */ + +#include "diff.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <signal.h> +#include "sigset.h" +#include <wchar.h> +#include <wctype.h> +#include <inttypes.h> +#include "mbtowi.h" +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds + * a pair of longest identical subsequences in the two + * files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * check'ed against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +#define prints(s) fputs(s,stdout) + +static FILE *input[2]; +static char mbuf[2][MB_LEN_MAX+1]; +static char *mcur[2]; +static char *mend[2]; +static int incompl[2]; + +struct cand { + long x; + long y; + long pred; +} cand; +static struct line { + long serial; + long value; +} *file[2], line; +static long len[2]; +static struct line *sfile[2];/*shortened by pruning common prefix and suffix*/ +static long slen[2]; +static long pref, suff; /* length of prefix and suffix */ +static long *class; /* will be overlaid on file[0] */ +static long *member; /* will be overlaid on file[1] */ +static long *klist; /* will be overlaid on file[0] after class */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static long clen = 0; +static long *J; /* will be overlaid on class */ +static off_t *ixold; /* will be overlaid on klist */ +static off_t *ixnew; /* will be overlaid on file[1] */ +static int (*chrtran)(int);/* translation for case-folding */ +static long pstart; /* start of last search for -p */ +static long plast; /* match of last search for -p */ +static long *saveJ; /* saved J for -p */ + +/* chrtran points to one of 3 translation functions: + * cup2low if folding upper to lower case + * clow2low if not folding case + * wlow2low if not folding case and MB_CUR_MAX > 1 + */ +static int +clow2low(int c) +{ + return c; +} + +static int +cup2low(int c) +{ + return tolower(c); +} + +static int +wup2low(int c) +{ + return c & ~(wchar_t)0177 ? towlower(c) : tolower(c); +} + +static char *copytemp(char **, const char *); +#undef splice +#define splice xxsplice +static char *splice(const char *, char *); +static void prepare(int); +static void prune(void); +static void equiv(struct line *, long, struct line *, long, long *); +static long stone(long *, long, long *, long *); +static long newcand(long, long, long); +static long search(long *, long, long); +static void unravel(long); +static void check_sb(void); +static void check_mb(void); +static void sort(struct line *, long); +static void unsort(struct line *, long, long *); +static long skipline(int); +static long wskipline(int); +static void output(void); +static void change(long, long, long, long); +static void range(long, long, const char *); +static void fetch(off_t *, long, long, FILE *, const char *, int); +static int readhash(int); +static int asciifile(FILE *); +static void dump_context_vec(void); +static void sdone(int); +static char *wcget(int, wint_t *, int *); +static void missnl(int); +static void pdump(long); + +#define notseekable(m) (((m)&S_IFMT) != S_IFREG && ((m)&S_IFMT) != S_IFBLK) + +void +diffreg(void) +{ + register long i, j; + char buf1[BUFSIZ], buf2[BUFSIZ]; + + if (hflag) { + diffargv[0] = "diffh"; + execvp(diffh, diffargv); + if (sysv3) + fprintf(stderr, "%s: cannot find diffh\n", progname); + else + fprintf(stderr, "%s: %s: %s\n", progname, diffh, + strerror(errno)); + done(); + } + chrtran = (iflag? mb_cur_max>1 ? wup2low : cup2low : clow2low); + if ((stb1.st_mode & S_IFMT) == S_IFDIR) { + file1 = splice(file1, file2); + if (stat(file1, &stb1) < 0) { + if (sysv3) + stb1.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } + } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { + file2 = splice(file2, file1); + if (stat(file2, &stb2) < 0) { + if (sysv3) + stb2.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + } + if (!strcmp(file1, "-") || (notseekable(stb1.st_mode) && + strcmp(file1, "/dev/null"))) { + if (!strcmp(file2, "-")) { + fprintf(stderr, "%s: can't specify - -\n", progname); + done(); + } + file1 = copytemp(&tempfile1, file1); + if (stat(file1, &stb1) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } else if (!strcmp(file2, "-") || (notseekable(stb2.st_mode) && + strcmp(file2, "/dev/null"))) { + file2 = copytemp(&tempfile2, file2); + if (stat(file2, &stb2) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + if ((input[0] = fopen(file1, "r")) == NULL) { + if (sysv3) + fprintf(stderr, "%s: cannot open %s\n", + progname, file1); + else + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + mcur[0] = mend[0] = NULL; + if ((input[1] = fopen(file2, "r")) == NULL) { + if (sysv3) + fprintf(stderr, "%s: cannot open %s\n", + progname, file2); + else + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + fclose(input[0]); + done(); + } + mcur[1] = mend[1] = NULL; + if (stb1.st_size != stb2.st_size) + goto notsame; + for (;;) { + i = fread(buf1, 1, BUFSIZ, input[0]); + j = fread(buf2, 1, BUFSIZ, input[1]); + if (i < 0 || j < 0 || i != j) + goto notsame; + if (i == 0 && j == 0) { + fclose(input[0]); + fclose(input[1]); + status = 0; /* files don't differ */ + goto same; + } + for (j = 0; j < i; j++) + if (buf1[j] != buf2[j]) + goto notsame; + } +notsame: + /* + * Files certainly differ at this point; set status accordingly + */ + status = 1; + if (!aflag && (!asciifile(input[0]) || !asciifile(input[1]))) { + printf("Binary files %s and %s differ\n", file1, file2); + fclose(input[0]); + fclose(input[1]); + done(); + } + prepare(0); + prepare(1); + fclose(input[0]); + fclose(input[1]); + prune(); + sort(sfile[0],slen[0]); + sort(sfile[1],slen[1]); + + member = (long *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = ralloc(member,(slen[1]+2)*sizeof(*member)); + + class = (long *)file[0]; + unsort(sfile[0], slen[0], class); + class = ralloc(class,(slen[0]+2)*sizeof(*class)); + + klist = talloc((slen[0]+2)*sizeof(*klist)); + clist = talloc(sizeof(cand)); + i = stone(class, slen[0], member, klist); + tfree(member); + tfree(class); + + J = talloc((len[0]+2)*sizeof(*J)); + unravel(klist[i]); + tfree(clist); + tfree(klist); + + ixold = talloc((len[0]+2)*sizeof(*ixold)); + ixnew = talloc((len[1]+2)*sizeof(*ixnew)); + if (mb_cur_max > 1) + check_mb(); + else + check_sb(); + pstart = plast = 0; + output(); + status = anychange; +same: + if (opt == D_CONTEXT && anychange == 0) + printf("No differences encountered\n"); + done(); +} + +static char * +copytemp(char **tf, const char *fn) +{ + const char templ[] = "/tmp/dXXXXXX"; + char buf[BUFSIZ]; + register int i, f, sfd; + + if (*tf) { + unlink(*tf); + strcpy(*tf, templ); + } else { + sigset(SIGHUP,sdone); + sigset(SIGINT,sdone); + sigset(SIGPIPE,sdone); + sigset(SIGTERM,sdone); + *tf = strdup(templ); + } + f = mkstemp(*tf); + if (f < 0) { + fprintf(stderr, "%s: cannot create %s\n", progname, *tf); + done(); + } + if (strcmp(fn, "-")) { + if ((sfd = open(fn, O_RDONLY)) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, + strerror(errno)); + done(); + } + } else + sfd = 0; + while ((i = read(sfd,buf,BUFSIZ)) > 0) + if (write(f,buf,i) != i) { + fprintf(stderr, "%s: write failed %s\n", progname, *tf); + done(); + } + close(f); + if (sfd > 0) + close(sfd); + return (*tf); +} + +static char * +splice(const char *dir, char *file) +{ + const char *tail; + char *buf; + + if (!strcmp(file, "-")) { + fprintf(stderr, + "%s: can't specify - with other arg directory\n", + progname); + done(); + } + tail = basename(file); + buf = talloc(strlen(dir) + strlen(tail) + 2); + sprintf(buf, "%s/%s", dir, tail); + return (buf); +} + +static void +prepare(int i) +{ + register struct line *p; + register long j; + register long h; + + fseeko(input[i], 0, SEEK_SET); + mcur[i] = mend[i] = NULL; + p = talloc(3*sizeof(line)); + for(j=0; h=readhash(i);) { + p = ralloc(p,(++j+3)*sizeof(line)); + p[j].value = h; + } + len[i] = j; + file[i] = p; +} + +static void +prune(void) +{ + register long i; + register int j; + for(pref=0;pref<len[0]&&pref<len[1]&& + file[0][pref+1].value==file[1][pref+1].value; + pref++ ) ; + for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&& + file[0][len[0]-suff].value==file[1][len[1]-suff].value; + suff++) ; + for(j=0;j<2;j++) { + sfile[j] = file[j]+pref; + slen[j] = len[j]-pref-suff; + for(i=0;i<=slen[j];i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a,long n,struct line *b,long m,long *c) +{ + register long i, j; + i = j = 1; + while(i<=n && j<=m) { + if(a[i].value <b[j].value) + a[i++].value = 0; + else if(a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while(i <= n) + a[i++].value = 0; + b[m+1].value = 0; + j = 0; + while(++j <= m) { + c[j] = -b[j].serial; + while(b[j+1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +static long +stone(long *a,long n,long *b,register long *c) +{ + register long i, k,y; + long j, l; + long oldc, tc; + long oldl; + k = 0; + c[0] = newcand(0,0,0); + for(i=1; i<=n; i++) { + j = a[i]; + if(j==0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + do { + if(y <= clist[oldc].y) + continue; + l = search(c, k, y); + if(l!=oldl+1) + oldc = c[l-1]; + if(l<=k) { + if(clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i,y,oldc); + oldc = tc; + oldl = l; + } else { + c[l] = newcand(i,y,oldc); + k++; + break; + } + } while((y=b[++j]) > 0); + } + return(k); +} + +static long +newcand(long x,long y,long pred) +{ + register struct cand *q; + clist = ralloc(clist,++clen*sizeof(cand)); + q = clist + clen -1; + q->x = x; + q->y = y; + q->pred = pred; + return(clen-1); +} + +static long +search(long *c, long k, long y) +{ + register long i, j, l; + long t; + if(clist[c[k]].y<y) /*quick look for typical case*/ + return(k+1); + i = 0; + j = k+1; + while (1) { + l = i + j; + if ((l >>= 1) <= i) + break; + t = clist[c[l]].y; + if(t > y) + j = l; + else if(t < y) + i = l; + else + return(l); + } + return(l+1); +} + +static void +unravel(long p) +{ + register long i; + register struct cand *q; + for(i=0; i<=len[0]; i++) + J[i] = i<=pref ? i: + i>len[0]-suff ? i+len[1]-len[0]: + 0; + for(q=clist+p;q->y!=0;q=clist+q->pred) + J[q->x+pref] = q->y+pref; +} + +/* check does double duty: +1. ferret out any fortuitous correspondences due +to confounding by hashing (which result in "jackpot") +2. collect random access indexes to the two files */ + +static void +check_sb(void) +{ + register long i, j; + long jackpot; + off_t ctold, ctnew; + register int c,d; + + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for(i=1;i<=len[0];i++) { + if(J[i]==0) { + ixold[i] = ctold += skipline(0); + continue; + } + while(j<J[i]) { + ixnew[j] = ctnew += skipline(1); + j++; + } + if(bflag || wflag || iflag) { + for(;;) { + c = getc(input[0]); + d = getc(input[1]); + ctold++; + ctnew++; + if(bflag && isspace(c) && isspace(d)) { + do { + if(c=='\n') + break; + ctold++; + } while(isspace(c=getc(input[0]))); + do { + if(d=='\n') + break; + ctnew++; + } while(isspace(d=getc(input[1]))); + } else if ( wflag ) { + while( isspace(c) && c!='\n' ) { + c=getc(input[0]); + ctold++; + } + while( isspace(d) && d!='\n' ) { + d=getc(input[1]); + ctnew++; + } + } + if(chrtran(c) != chrtran(d)) { + jackpot++; + J[i] = 0; + if(c!='\n') + ctold += skipline(0); + if(d!='\n') + ctnew += skipline(1); + break; + } + if(c=='\n' || c==EOF) + break; + } + } else { + for(;;) { + ctold++; + ctnew++; + if((c=getc(input[0])) != (d=getc(input[1]))) { + /* jackpot++; */ + J[i] = 0; + if(c!='\n') + ctold += skipline(0); + if(d!='\n') + ctnew += skipline(1); + break; + } + if(c=='\n' || c==EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for(;j<=len[1];j++) { + ixnew[j] = ctnew += skipline(1); + } + fclose(input[0]); + fclose(input[1]); +/* + if(jackpot) + fprintf(stderr, "jackpot\n"); +*/ +} + +static void +check_mb(void) +{ + register long i, j; + long jackpot; + off_t ctold, ctnew; + wint_t wc, wd; + int nc, nd; + char *cc, *cd; + + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + mcur[0] = mend[0] = NULL; + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + mcur[1] = mend[1] = NULL; + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for(i=1;i<=len[0];i++) { + if(J[i]==0) { + ixold[i] = ctold += wskipline(0); + continue; + } + while(j<J[i]) { + ixnew[j] = ctnew += wskipline(1); + j++; + } + if(bflag || wflag || iflag) { + for(;;) { + cc = wcget(0, &wc, &nc); + cd = wcget(1, &wd, &nd); + if(bflag && iswspace(wc) && iswspace(wd)) { + do { + if(wc=='\n') + break; + ctold += nc; + } while(cc = wcget(0, &wc, &nc), + iswspace(wc)); + do { + if(wd=='\n') + break; + ctnew += nd; + } while(cd = wcget(1, &wd, &nd), + iswspace(wd)); + ctold += nc; + ctnew += nd; + } else if ( wflag ) { + ctold += nc; + ctnew += nd; + while( iswspace(wc) && wc!='\n' && cc) { + cc = wcget(0, &wc, &nc); + ctold += nc; + } + while( iswspace(wd) && wd!='\n' && cd) { + cd = wcget(1, &wd, &nd); + ctnew += nd; + } + } else { + ctold += nc; + ctnew += nd; + } + if(chrtran(wc) != chrtran(wd) || + wc == WEOF && wd == WEOF && + (cc == NULL && cd && *cd || + cc && *cc && cd == NULL || + cc && cd && *cc != *cd)) { + jackpot++; + J[i] = 0; + if(wc!='\n') + ctold += wskipline(0); + if(wd!='\n') + ctnew += wskipline(1); + break; + } + if(wc=='\n' || cc == NULL) + break; + } + } else { + for(;;) { + cc = wcget(0, &wc, &nc); + cd = wcget(1, &wd, &nd); + ctold += nc; + ctnew += nd; + if (wc != wd || wc == WEOF && wd == WEOF && + (cc == NULL && cd && *cd || + cc && *cc && cd == NULL || + cc && cd && *cc != *cd)) { + /* jackpot++; */ + J[i] = 0; + if(wc!='\n') + ctold += wskipline(0); + if(wd!='\n') + ctnew += wskipline(1); + break; + } + if(wc=='\n' || cc == NULL) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for(;j<=len[1];j++) { + ixnew[j] = ctnew += wskipline(1); + } + fclose(input[0]); + fclose(input[1]); +/* + if(jackpot) + fprintf(stderr, "jackpot\n"); +*/ +} + +static void +sort(struct line *a,long n) /*shellsort CACM #201*/ +{ + struct line w; + register long j,m = 0; + struct line *ai; + register struct line *aim; + long k; + + if (n == 0) + return; + for(j=1;j<=n;j*= 2) + m = 2*j - 1; + for(m/=2;m!=0;m/=2) { + k = n-m; + for(j=1;j<=k;j++) { + for(ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if(aim < ai) + break; /*wraparound*/ + if(aim->value > ai[0].value || + aim->value == ai[0].value && + aim->serial > ai[0].serial) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, long l, long *b) +{ + register long *a; + register long i; + a = talloc((l+1)*sizeof(*a)); + for(i=1;i<=l;i++) + a[f[i].serial] = f[i].value; + for(i=1;i<=l;i++) + b[i] = a[i]; + tfree(a); +} + +static long +skipline(int f) +{ + register long i; + register int c; + + for(i=1;(c=getc(input[f]))!='\n';i++) + if (c == EOF) + return(i); + return(i); +} + +static long +wskipline(int f) +{ + long i; + int n; + wint_t wc; + char *cp; + + for (i = 1; cp = wcget(f, &wc, &n), wc != '\n'; i += n) + if (cp == NULL) + return (i); + return (i); +} + +static void +output(void) +{ + long m; + register long i0, i1, j1; + long j0; + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + m = len[0]; + J[0] = 0; + J[m+1] = len[1]+1; + if (pflag) { + saveJ = talloc((len[0]+2)*sizeof(*saveJ)); + memcpy(saveJ, J, (len[0]+2)*sizeof(*saveJ)); + } + if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) { + while(i0<=m&&J[i0]==J[i0-1]+1) i0++; + j0 = J[i0-1]+1; + i1 = i0-1; + while(i1<m&&J[i1+1]==0) i1++; + j1 = J[i1+1]-1; + J[i1] = j1; + change(i0,i1,j0,j1); + } else for(i0=m;i0>=1;i0=i1-1) { + while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--; + j0 = J[i0+1]-1; + i1 = i0+1; + while(i1>1&&J[i1-1]==0) i1--; + j1 = J[i1-1]+1; + J[i1] = j1; + change(i1,i0,j1,j0); + } + if(m==0) + change(1,0,1,len[1]); + if (opt==D_IFDEF) { + for (;;) { +#define c i0 + c = getc(input[0]); + if (c == EOF) + goto end; + putchar(c); + } +#undef c + } + if (anychange && (opt == D_CONTEXT || opt == D_UNIFIED)) + dump_context_vec(); +end: fclose(input[0]); + fclose(input[1]); +} + +static int +allblank(off_t *f, long a, long b, FILE *lb) +{ + long i; + + if (a > b) + return 1; + for (i = a; i <= b; i++) + if (f[i]-f[i-1] != 1) + return 0; + return 1; +} + +/* + * The following struct is used to record change information when + * doing a "context" diff. (see routine "change" to understand the + * highly mneumonic field names) + */ +struct context_vec { + long a; /* start line in old file */ + long b; /* end line in old file */ + long c; /* start line in new file */ + long d; /* end line in new file */ +}; + +static struct context_vec *context_vec_start, + *context_vec_end, + *context_vec_ptr; + +#define MAX_CONTEXT 129 + +/* indicate that there is a difference between lines a and b of the from file + to get to lines c to d of the to file. + If a is greater then b then there are no lines in the from file involved + and this means that there were lines appended (beginning at b). + If c is greater than d then there are lines missing from the to file. +*/ +static void +change(long a,long b,long c,long d) +{ + int ch; + struct stat stbuf; + + if (opt != D_IFDEF && a>b && c>d) + return; + if (Bflag && allblank(ixold,a,b,input[0]) && + allblank(ixnew,c,d,input[1])) + return; + if (anychange == 0) { + anychange = 1; + if(opt == D_CONTEXT || opt == D_UNIFIED) { + if (opt == D_CONTEXT) { + printf("*** %s\t", file1); + stat(file1, &stbuf); + printf("%s--- %s\t", + ctime(&stbuf.st_mtime), file2); + stat(file2, &stbuf); + printf("%s", ctime(&stbuf.st_mtime)); + } else { /* opt == D_UNIFIED */ + printf("--- %s\t", file1); + stat(file1, &stbuf); + printf("%s+++ %s\t", + ctime(&stbuf.st_mtime), file2); + stat(file2, &stbuf); + printf("%s", ctime(&stbuf.st_mtime)); + } + + context_vec_start = talloc(MAX_CONTEXT * + sizeof(*context_vec_start)); + context_vec_end = context_vec_start + MAX_CONTEXT; + context_vec_ptr = context_vec_start - 1; + } + } + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + if(opt == D_CONTEXT || opt == D_UNIFIED) { + /* + * if this new change is within 'context' lines of + * the previous change, just add it to the change + * record. If the record is full or if this + * change is more than 'context' lines from the previous + * change, dump the record, reset it & add the new change. + */ + if ( context_vec_ptr >= context_vec_end-1 || + ( context_vec_ptr >= context_vec_start && + a > (context_vec_ptr->b + 2*context) && + c > (context_vec_ptr->d + 2*context) ) ) + dump_context_vec(); + + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + switch (opt) { + + case D_NORMAL: + case D_EDIT: + range(a,b,","); + putchar(a>b?'a':c>d?'d':'c'); + if(opt==D_NORMAL) + range(c,d,","); + putchar('\n'); + break; + case D_REVERSE: + putchar(a>b?'a':c>d?'d':'c'); + range(a,b," "); + putchar('\n'); + break; + case D_NREVERSE: + if (a>b) + printf("a%ld %ld\n",b,d-c+1); + else { + printf("d%ld %ld\n",a,b-a+1); + if (!(c>d)) + /* add changed lines */ + printf("a%ld %ld\n",b, d-c+1); + } + break; + } + if(opt == D_NORMAL || opt == D_IFDEF) { + fetch(ixold,a,b,input[0],"< ", 1); + if(a<=b&&c<=d && opt == D_NORMAL) + prints("---\n"); + } + fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0); + if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d) + prints(".\n"); + if (inifdef) { + fprintf(stdout, "#endif %s\n", endifname); + inifdef = 0; + } +} + +static void +range(long a,long b,const char *separator) +{ + printf("%ld", a>b?b:a); + if(a<b || opt==D_UNIFIED) { + printf("%s%ld", separator, opt==D_UNIFIED ? b-a+1 : b); + } +} + +static void +fetch(off_t *f,long a,long b,FILE *lb,const char *s,int oldfile) +{ + register long i, j; + register int c; + register long col; + register long nc; + int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0'); + + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if (opt == D_IFDEF && oldfile){ + off_t curpos = ftello(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a>b? b : a-1 ] - curpos; + for (i = 0; i < nc; i++) { + c = getc(lb); + if (c == EOF) + break; + putchar(c); + } + } + if (a > b) + return; + if (opt == D_IFDEF) { + if (inifdef) + fprintf(stdout, "#else %s%s\n", oneflag && oldfile==1 ? "!" : "", ifdef2); + else { + if (oneflag) { + /* There was only one ifdef given */ + endifname = ifdef2; + if (oldfile) + fprintf(stdout, "#ifndef %s\n", endifname); + else + fprintf(stdout, "#ifdef %s\n", endifname); + } + else { + endifname = oldfile ? ifdef1 : ifdef2; + fprintf(stdout, "#ifdef %s\n", endifname); + } + } + inifdef = 1+oldfile; + } + + for(i=a;i<=b;i++) { + fseeko(lb,f[i-1],SEEK_SET); + nc = f[i]-f[i-1]; + if (opt != D_IFDEF) + prints(s); + col = 0; + for(j=0;j<nc;j++) { + c = getc(lb); + if (c == '\t' && tflag) + do + putchar(' '); + while (++col & 7); + else if (c == EOF) { + if (aflag) + printf("\n\\ No newline at " + "end of file\n"); + else + putchar('\n'); + break; + } else { + putchar(c); + col++; + } + } + } + + if (inifdef && !wantelses) { + fprintf(stdout, "#endif %s\n", endifname); + inifdef = 0; + } +} + +#define POW2 /* define only if HALFLONG is 2**n */ +#define HALFLONG 16 +#define low(x) (x&((1L<<HALFLONG)-1)) +#define high(x) (x>>HALFLONG) + +/* + * hashing has the effect of + * arranging line in 7-bit bytes and then + * summing 1-s complement in 16-bit hunks + */ +static int +readhash(register int f) +{ + register int32_t sum; + register unsigned shift; + register int t; + register int space; + int content; + wint_t wt; + int n; + char *cp; + + sum = 1; + space = 0; + content = 0; + if(!bflag && !wflag) { + if(iflag) { + if (mb_cur_max > 1) { + for (shift = 0; cp = wcget(f, &wt, &n), + wt != '\n'; shift += 7) { + if (cp == NULL) { + if (content) { + missnl(f); + break; + } + return (0); + } + content = 1; + sum += (int32_t)chrtran(wt) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } else { + for(shift=0;(t=getc(input[f]))!='\n';shift+=7) { + if(t==EOF) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + sum += (int32_t)chrtran(t) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } + } else { + for(shift=0;(t=getc(input[f]))!='\n';shift+=7) { + if(t==EOF) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + sum += (int32_t)t << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } + } else { + if (mb_cur_max > 1) { + for(shift=0;;) { + if ((cp = wcget(f, &wt, &n)) == NULL) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + switch (wt) { + default: + if (iswspace(wt)) { + space++; + continue; + } + if(space && !wflag) { + shift += 7; + space = 0; + } + sum += (int32_t)chrtran(wt) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + shift += 7; + continue; + case '\n': + break; + } + break; + } + } else { + for(shift=0;;) { + switch(t=getc(input[f])) { + case EOF: + if (content) { + missnl(f); + break; + } + return(0); + default: + content = 1; + if (isspace(t)) { + space++; + continue; + } + if(space && !wflag) { + shift += 7; + space = 0; + } + sum += (int32_t)chrtran(t) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + shift += 7; + continue; + case '\n': + break; + } + break; + } + } + } + sum = low(sum) + high(sum); + return((int16_t)low(sum) + (int16_t)high(sum)); +} + +static int +asciifile(FILE *f) +{ + char buf[BUFSIZ]; + register int cnt; + register char *cp; + + fseeko(f, 0, SEEK_SET); + cnt = fread(buf, 1, BUFSIZ, f); + cp = buf; + while (--cnt >= 0) + if (*cp++ == '\0') + return (0); + return (1); +} + + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(void) +{ + register long a, b = 0, c, d = 0; + register char ch; + register struct context_vec *cvp = context_vec_start; + register long lowa, upb, lowc, upd; + register int do_output; + + if ( cvp > context_vec_ptr ) + return; + + lowa = max(1, cvp->a - context); + upb = min(len[0], context_vec_ptr->b + context); + lowc = max(1, cvp->c - context); + upd = min(len[1], context_vec_ptr->d + context); + + if (opt == D_UNIFIED) { + printf("@@ -"); + range(lowa, upb, ","); + printf(" +"); + range(lowc, upd, ","); + printf(" @@"); + if (pflag) + pdump(lowa-1); + printf("\n"); + + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'a': + fetch(ixold,lowa,b,input[0]," ", 0); + fetch(ixnew,c,d,input[1],"+",0); + break; + case 'c': + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0],"-",0); + fetch(ixnew,c,d,input[1],"+",0); + break; + case 'd': + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0],"-",0); + break; + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b+1, upb, input[0], " ", 0); + } + + if (opt == D_CONTEXT) { + printf("***************"); + if (pflag) + pdump(lowa-1); + printf("\n*** "); + range(lowa,upb,","); + printf(" ****\n"); + + /* + * output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for ( ; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + + if ( do_output ) { + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold,lowa,b,input[0]," ", 0); + else { + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0], + ch == 'c' ? "! " : "- ", + 0); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b+1, upb, input[0], " ", 0); + } + + /* output changes to the "new" file */ + printf("--- "); + range(lowc,upd,","); + printf(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew,lowc,d,input[1]," ", 0); + else { + fetch(ixnew,lowc,c-1,input[1]," ", 0); + fetch(ixnew,c,d,input[1], + ch == 'c' ? "! " : "+ ", + 0); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d+1, upd, input[1], " ", 0); + } + } + + context_vec_ptr = context_vec_start - 1; +} + +/*ARGSUSED*/ +static void +sdone(int signo) +{ + done(); +} + +static char * +wcget(int f, wint_t *wc, int *len) +{ + size_t rest; + int c, i, n; + + i = 0; + rest = mend[f] - mcur[f]; + if (rest && mcur[f] > mbuf[f]) { + do + mbuf[f][i] = mcur[f][i]; + while (i++, --rest); + } else if (incompl[f]) { + incompl[f] = 0; + *wc = WEOF; + mend[f] = mcur[f] = NULL; + return NULL; + } + if (i == 0) { + c = getc(input[f]); + if (c == EOF) { + *wc = WEOF; + mend[f] = mcur[f] = NULL; + return NULL; + } + mbuf[f][i++] = c; + } + if (mbuf[f][0] & 0200) { + while (mbuf[f][i-1] != '\n' && i < mb_cur_max && + incompl[f] == 0) { + c = getc(input[f]); + if (c != EOF) + mbuf[f][i++] = c; + else + incompl[f] = 1; + } + n = mbtowi(wc, mbuf[f], i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = mbuf[f][0]; + *len = n = 1; + } + mcur[f] = &mbuf[f][*len]; + mend[f] = &mcur[f][i - *len]; + return mbuf[f]; +} + +static void +missnl(int f) +{ + if (aflag == 0) + fprintf(stderr, "Warning: missing newline at end of file %s\n", + f == 0 ? file1 : file2); +} + +/* + * Find and dump the name of the C function with the -p option. The + * search begins at line sa. + */ +static void +pdump(long sa) +{ +#define psize 40 + static char lbuf[psize*MB_LEN_MAX+1]; + char mbuf[MB_LEN_MAX+1]; + int c, i, j; + wchar_t wc; + long a = sa; + + while (a-- > pstart) { + if (saveJ[a+1] == 0) + continue; + fseeko(input[0], ixold[a], SEEK_SET); + i = 0; + do { + if ((c=getc(input[0])) == EOF || c == '\n') + break; + mbuf[i] = c; + } while (++i<mb_cur_max); + if (mb_cur_max>1) { + mbuf[i] = 0; + if (((c=mbuf[0])&0200)==0) + wc = mbuf[0]; + else if (mbtowc(&wc, mbuf, i) < 0) + continue; + } + if ((mb_cur_max>1 && mbuf[0]&0200 ? iswalpha(wc):isalpha(c)) || + c == '$' || c == '_') { + plast = a+1; + for (j = 0; j < i; j++) + lbuf[j] = mbuf[j]; + while (i < sizeof lbuf - 1) { + if ((c=getc(input[0])) == EOF || c == '\n') + break; + lbuf[i++] = c; + } + for (j=0;j<i&&j<psize;) { + if (mb_cur_max==1 || (lbuf[j]&0200) == 0) + j++; + else { + c = mbtowc(NULL, &lbuf[j], i-j); + j += c>0 ? c:1; + } + } + lbuf[j] = 0; + break; + } + } + pstart = sa; + if (plast) { + putchar(' '); + for (i = 0; lbuf[i]; i++) + putchar(lbuf[i] & 0377); + } +} diff --git a/diff/diffver.c b/diff/diffver.c @@ -0,0 +1,15 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)diff.sl 1.51 (gritter) 3/15/07"; +/* SLIST */ +/* +diff.c: Sccsid @(#)diff.c 1.24 (gritter) 3/27/05> +diff.h: Sccsid @(#)diff.h 1.15 (gritter) 3/26/05> +diffdir.c: Sccsid @(#)diffdir.c 1.30 (gritter) 1/22/06> +diffreg.c: Sccsid @(#)diffreg.c 1.30 (gritter) 3/15/07> +*/ diff --git a/diff/mkfile b/diff/mkfile @@ -0,0 +1,14 @@ +BIN = diff diffh +diff_OBJ = diff.o diffdir.o diffreg.o diffver.o +diffh_OBJ = diffh.o +INSTALL_BIN = diff +INSTALL_LIB = diffh +INSTALL_MAN1 = diff.1 +LOCAL_CFLAGS = -DDIFFH=\"$LIBDIR/diffh\" +DEPS = libcommon + +<$mkbuild/mk.default + +diff.o: diff.h +diffdir.o: diff.h +diffreg.o: diff.h diff --git a/ed/depsinc.mk b/ed/depsinc.mk @@ -0,0 +1 @@ +ED = $ed_DEPDIR/ed diff --git a/ed/ed.1 b/ed/ed.1 @@ -0,0 +1,1033 @@ +'\" t +.\" Sccsid @(#)ed.1 1.48 (gritter) 6/22/05 +.\" Parts taken from ed(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH ED 1 "6/22/05" "Heirloom Toolchest" "User Commands" +.if t .ds q \(aa +.if n .ds q ' +.SH NAME +ed \- text editor +.SH SYNOPSIS +\fBed\fR [\fB\-\fR\ |\ \fB\-s\fR] [\fB\-p\fI\ prompt\fR] [\fIname\fR] +.SH DESCRIPTION +.I Ed +is the standard text editor. +.PP +If a +.I name +argument is given, +.I ed +simulates an +.I e +command (see below)\| on the named file; that is to say, +the file is read into +.IR ed 's +buffer so that it can be edited. +The optional +.B \- +or +.B \-s +suppresses the printing +of character counts by +.IR e , +.IR r , +and +.I w +commands, +and of the `!' after completion of a shell command. +.PP +With the +.B \-p +option, +the given +.I prompt +string is printed before each command is read. +.PP +.I Ed +operates on a copy of any file it is editing; changes made +in the copy have no effect on the file until a +.IR w "" +(write)\| +command is given. +The copy of the text being edited resides +in a temporary file called the +.IR buffer . +.PP +The editor supports format specifications as defined in +.IR fspec (5). +If the terminal is configured to expand tabulators +(as enabled with +.I stty tab3 +or +.IR "stty \-tabs"), +and the first line of the file being edited +contains a format specification, +the +.I t +and +.I s +are interpreted, +that is, tabulators are expanded and lines are truncated +when printing to the terminal. For example, +.RS +<:t\-f s72:> +.sp +.RE +selects FORTRAN format and truncates lines at 72 characters. +No expansion or truncation is performed by +.I ed +when input is typed to the terminal. +.PP +Commands to +.I ed +have a simple and regular structure: zero or +more +.I addresses +followed by a single character +.I command, +possibly +followed by parameters to the command. +These addresses specify one or more lines in the buffer. +Missing addresses are supplied by default. +.PP +In general, only one command may appear on a line. +Certain commands allow the +addition of text to the buffer. +While +.I ed +is accepting text, it is said +to be in +.I "input mode." +In this mode, no commands are recognized; +all input is merely collected. +Input mode is left by typing a period `\fB.\fR' alone at the +beginning of a line. +.PP +.I Ed +supports a limited form of +.I "regular expression" +notation. +A regular expression specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. +In the following specification for regular expressions +the word `character' means any character but newline. +.B /usr/5bin/ed +uses simple regular expressions, +whereas +.BR /usr/5bin/s42/ed , +.BR /usr/5bin/posix/ed , +and +.B /usr/5bin/posix2001/ed +use basic regular expressions. +.SS "Simple Regular Expressions" +.IP 1. +Any character except a special character +matches itself. +Special characters are +the regular expression delimiter plus +.RB \e\|[\| . +and sometimes ^\|*\|$. +.IP 2. +A +.B .\& +matches any character. +.IP 3. +A \fB\e\fR followed by any character except a digit +or (\|) {\|} <\|> matches that character. +.IP 4. +A nonempty string +.I s +bracketed +\fB[\fI\|s\|\fB]\fR +(or +\fB[^\fIs\|\fB]\fR) +forms a \fIbracket expression\fR that +matches any character in (or not in) +.I s. +In +.I s, +\e has no special meaning, and ] may only appear as +the first letter. +A substring +\fIa\fB\-\fIb\fR, +with +.I a +and +.I b +in ascending ASCII order, stands for the inclusive +range of ASCII characters. +.IP 5. +A regular expression of form 1-4 followed by \fB*\fR matches a sequence of +0 or more matches of the regular expression. +.IP 6. +A regular expression of form 1-4 +followed by \fB\e{\fIm\fB,\fIn\fB\e}\fR +forms an \fIinterval expression\fR that +matches a sequence of \fIm\fR through \fIn\fR matches, inclusive, +of the regular expression. +The values of \fIm\fR and \fIn\fR must be non-negative +and smaller than 256. +The form \fB\e{\fIm\fB\e}\fR matches exactly \fIm\fR occurrences, +\fB\e{\fIm\fB,\e}\fR matches at least \fIm\fR occurrences. +.IP 7. +The sequence \fB\e<\fR forces the match +to occur only at the beginning of a ``variable'' or ``word''; +that is, either at the beginning of a line, +or just before a letter, digit or underline +and after a character not one of these. +.IP 8. +The sequence \fB\e>\fR matches the end +of a ``variable'' or ``word'', +i.\|e. either the end of the line +or before character which is neither a letter, +nor a digit, nor the underline character. +.IP 9. +A regular expression, +.I x, +of form 1-11, parenthesized +\fB\e(\fI\|x\|\fB\e)\fR +is called a \fIsubexpression\fR and +matches what +.I x +matches. +.IP 10. +A \fB\e\fR followed by a digit +.I n +forms a \fIbackreference\fR and +matches a copy of the string that the +parenthesized regular expression beginning with the +.IR n th +\e( matched. +.IP 11. +A regular expression of form 1-11, +.I x, +followed by a regular expression of form 1-10, +.I y +matches a match for +.I x +followed by a match for +.I y, +with the +.I x +match being as long as possible while still permitting a +.I y +match. +.IP 12. +A regular expression of form 1-11 preceded by \fB^\fR +(or followed by \fB$\fR), is constrained to matches that +begin at the left (or end at the right) end of a line +(\fIanchoring\fR). +.IP 13. +A regular expression of form 1-12 picks out the +longest among the leftmost matches in a line. +.IP 14. +An empty regular expression stands for a copy of the +last regular expression encountered. +.SS "Basic Regular Expressions" +Basic Regular Expressions add the following features +to Simple Regular Expressions: +.IP 15. +The special character \fB*\fR, as described in 5., +and the interval expressions described in 6. +can also be applied to subexpressions +as described in 9. +For POSIX.1-2001 utilities such as +.BR /usr/5bin/posix2001/ed , +subexpressions do not match the empty string +if there is a possible longer match. +.IP 16. +In bracket expressions as described in 4., +the following character sequences are considered special: +.IP +Character class expressions of the form +\fB[:\fIclass\fB:]\fR. +In the C LC_CTYPE locale, +the classes +.sp +.TS +l l l l. +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.TE +.sp +are recognized; +further locale-specific classes may be available. +A character class expression matches any character +that belongs to the given class in the current LC_CTYPE locale. +.IP +Collating symbol expressions of the form +\fB[.\fIc\fB.]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +A collating symbol expression +matches the specified collating symbol. +.IP +Equivalence class expressions of the form +\fB[=\fIc\fB=]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +An equivalence class expression +matches any character that has the same collating weight +as \fIc\fR. +.PP +Regular expressions are used in addresses to specify +lines and in one command +(see +.I s +below)\| +to specify a portion of a line which is to be replaced. +If it is desired to use one of +the regular expression metacharacters as an ordinary +character, that character may be preceded by `\e'. +This also applies to the character bounding the regular +expression (often `/')\| and to `\e' itself. +.PP +To understand addressing in +.I ed +it is necessary to know that at any time there is a +.I "current line." +Generally speaking, the current line is +the last line affected by a command; however, +the exact effect on the current line +is discussed under the description of +the command. +Addresses are constructed as follows. +.TP +1. +The character `\fB.\fR' addresses the current line. +.TP +2. +The character `\fB$\fR' addresses the last line of the buffer. +.TP +3. +A decimal number +.I n +addresses the +.IR n -th +line of the buffer. +.TP +4. +`\fB\(fm\fIx\fR' addresses the line marked with the name +.IR x , +which must be a lower-case letter. +Lines are marked with the +.I k +command described below. +.TP +5. +A regular expression enclosed in slashes `\fB/\fR' addresses +the line found by searching forward from the current line +and stopping at the first line containing a +string that matches the regular expression. +If necessary the search wraps around to the beginning of the +buffer. +.TP +6. +A regular expression enclosed in queries `\fB?\fR' addresses +the line found by searching backward from the current line +and stopping at the first line containing +a string that matches the regular expression. +If necessary +the search wraps around to the end of the buffer. +.TP +7. +An address followed by a plus sign `\fB+\fR' +or a minus sign `\fB\-\fR' followed by a decimal number +specifies that address plus +(resp. minus)\| the indicated number of lines. +The plus sign may be omitted. +.TP +8. +If an address begins with `\fB+\fR' or `\fB\-\fR' +the addition or subtraction is taken with respect to the current line; +e.g. `\-5' is understood to mean `\fB.\fR\-5'. +.TP +9. +If an address ends with `\fB+\fR' or `\fB\-\fR', +then 1 is added (resp. subtracted). +As a consequence of this rule and rule 8, +the address `\-' refers to the line before the current line. +Moreover, +trailing +`+' and `\-' characters +have cumulative effect, so `\-\-' refers to the current +line less 2. +.TP +10. +To maintain compatibility with earlier versions of the editor, +the character `\fB^\fR' in addresses is +equivalent to `\-'. +.PP +Commands may require zero, one, or two addresses. +Commands which require no addresses regard the presence +of an address as an error. +Commands which accept one or two addresses +assume default addresses when insufficient are given. +If more addresses are given than such a command requires, +the last one or two (depending on what is accepted)\| are used. +.PP +Addresses are separated from each other typically by a comma +`\fB,\fR'. +They may also be separated by a semicolon +`\fB;\fR'. +In this case the current line `\fB.\fR' is set to +the previous address before the next address is interpreted. +This feature can be used to determine the starting +line for forward and backward searches (`/', `?')\|. +The second address of any two-address sequence +must correspond to a line following the line corresponding to the first address. +.PP +Omission of the first address causes +the first line to be used with `,', +or the current line with `;', respectively; +if the second address is also omitted, +the last line of the buffer is used. +Thus a single `,' specifies the entire contents of the buffer, +and a single `;' specifies the contents +ranging from the current line to the last one. +.PP +In the following list of +.I ed +commands, the default addresses +are shown in parentheses. +The parentheses are not part of +the address, but are used to show that the given addresses are +the default. +.PP +As mentioned, it is generally illegal for more than one +command to appear on a line. +However, most commands may be suffixed by `p', `l', or `n', +in which case +the current line is either +printed, listed, or numbered respectively +in the way discussed below. +.TP 5 +\fR(\|\fI.\|\fR)\fB\|a\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +The append command reads the given text +and appends it after the addressed line. +`\fB.\fR' is left +on the last line input, if there +were any, otherwise at the addressed line. +Address `0' is legal for this command; text is placed +at the beginning of the buffer. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBc\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +The change +command deletes the addressed lines, then accepts input +text which replaces these lines. +`\fB.\fR' is left at the last line input; if there were none, +it is left at the line preceding the deleted lines. +For +.BR /usr/5bin/posix2001/ed , +a `0' as the first address is identical to `1'. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBd\fR +The delete command deletes the addressed lines from the buffer. +The line originally after the last line deleted becomes the current line; +if the lines deleted were originally at the end, +the new last line becomes the current line. +.TP 5 +\fBe\ \fIfilename\fR +The edit +command causes the entire contents of the buffer to be deleted, +and then the named file to be read in. +`\fB.\fR' is set to the last line of the buffer. +The number of characters read is typed. +`\fIfilename\fR' is remembered for possible use as a default file name +in a subsequent +.I r +or +.I w +command. +If `\fIfilename\fR' is missing, the remembered name is used. +A `\fIfilename\fR' starting with a `\fB!\fR' +causes the output of the shell command following this character +to be read in. +.TP 5 +\fBE\ \fIfilename\fR +This command is the same as +.IR e , +except that no diagnostic results when no +.I w +has been given since the last buffer alteration. +.TP 5 +\fBf\ \fIfilename\fR +The filename command prints the currently remembered file name. +If `\fIfilename\fR' is given, +the currently remembered file name is changed to `\fIfilename\fR'. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBg/\fIregular expression\fB/\fIcommand list\fR +In the global +command, the first step is to mark every line which matches +the given \fIregular expression\fR. +Then for every such line, the +given \fIcommand list\fR is executed +with `\fB.\fR' initially set to that line. +A single command or the first of multiple commands +appears on the same line with the global command. +All lines of a multi-line list except the last line must be ended with `\e'. +.I A, +.I i, +and +.I c +commands and associated input are permitted; +the `\fB.\fR' terminating input mode may be omitted if it would be on the +last line of the command list. +The commands +.I g +and +.I v +are not permitted in the command list. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBG/\fIregular expression\fB/\fR +The interactive global command +first marks every line matching the given \fIregular expression\fR. +Then each line is printed +and a command is read and executed for this line. +A single newline character causes the line to remain unchanged, +an isolated `\fB&\fR' repeats the command given for the previous line. +The command can be terminated by an interrupt signal. +.TP 5 +.B h +This command prints a verbose description for the +last error encountered. +.TP +.B H +This command acts like the +.I h +command, +but also causes verbose descriptions to be printed +on all following error conditions. +Another +.I H +turns verbose mode off. +.TP 5 +\fR(\|\fI.\|\fR)\|\fBi\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +This command inserts the given text before the addressed line. +`\fB.\fR' is left at the last line input, or, if there were none, +at the line before the addressed line. +This command differs from the +.I a +command only in the placement of the +text. +For +.BR /usr/5bin/posix2001/ed , +a `0' as the first address is identical to `1'. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.+1\fR)\|\fBj\fR +This command joins the addressed lines into a single line; +intermediate newlines simply disappear. +`\fB.\fR' is left at the resulting line. +.TP 5 +\fR(\fI.\fR)\|\fBk\fIx\fR +The mark command marks the addressed line with +name +.IR x , +which must be a lower-case letter. +The address form `\(fm\fIx\fR' then addresses this line. +.ne 2.5 +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBl\fR +The list command +prints the addressed lines in an unambiguous way: +.B /usr/5bin/ed +prints +non-graphic control characters in three-digit octal; +.BR /usr/5bin/s42/ed , +.BR /usr/5bin/posix/ed , +and +.B /usr/5bin/posix2001/ed +print control characters as C-style escape sequences +or in three-digit octal. +Long lines are folded. +The +.I l +command may be placed on the same line after any non-i/o +command. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBm\fIa\fR +The move command repositions the addressed lines after the line +addressed by +.IR a . +The last of the moved lines becomes the current line. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBn\fR +This command prints lines preceded by their line numbers. +It otherwise acts like the +.I p +command described below. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBp\fR +The print command prints the addressed lines. +`\fB.\fR' +is left at the last line printed. +The +.I p +command +may +be placed on the same line after any non-i/o command. +.TP +.B P +This command causes a prompt to be printed +before following commands are read. +The default prompt is a `*' character, +but can be set with the +.I \-p +command line option (which also enables the prompt). +Another +.I P +disables the prompt. +.TP 5 +.B q +The quit command causes +.I ed +to exit. +No automatic write +of a file is done. +.TP 5 +.B Q +This command is the same as +.I q, +except that no diagnostic results when no +.I w +has been given since the last buffer alteration. +.TP 5 +\fR(\fI$\fR)\|\fBr\ \fIfilename\fR +The read command +reads in the given file after the addressed line. +If no file name is given, +the remembered file name, if any, is used +(see +.I e +and +.I f +commands)\|. +The file name is remembered if there was no +remembered file name already. +Address `0' is legal for +.I r +and causes the +file to be read at the beginning of the buffer. +If the read is successful, the number of characters +read is typed. +`\fB.\fR' is left at the last line read in from the file. +A `filename' starting with a `\fB!\fR' +causes the output of the shell command following this character +to be read in. +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/\fR or, +.br +.ns +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/g\fR or, +.br +.ns +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/\fInumber\fR +The substitute command searches each addressed +line for an occurrence of the specified regular expression. +On each line in which a match is found, +all matched strings are replaced by the replacement specified, +if the global replacement indicator +.RB ` g ' +appears after the command. +If the global indicator does not appear, only the first occurrence +of the matched string is replaced; +if the \fInumber\fR indicator is given, +the numbered occurrence is replaced. +It is an error for the substitution to fail on all addressed lines. +Any character other than space or new-line +may be used instead of `/' to delimit the regular expression +and the replacement. +`\fB.\fR' is left at the last line substituted. +.IP +An ampersand +.RB ` & ' +appearing in the replacement +is replaced by the string matching the regular expression. +The special meaning of `&' in this context may be +suppressed by preceding it by +.RB ` \e '. +The characters `\|\fB\e\fIn\fR' +where +.I n +is a digit, +are replaced by the text matched by the +.IR n -th +regular subexpression +enclosed between `\e(' and `\e)'. +When +nested, parenthesized subexpressions +are present, +.I n +is determined by counting occurrences of `\e(' starting from the left. +.IP +A substitution string consisting of a single +.RB ` % ' +causes the string given on the previous substitution to be re-used. +.IP +Lines may be split by substituting new-line characters into them. +The new-line in the +replacement string +must be escaped by preceding it by +.RB ` \e '. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBt\|\fIa\fR +This command acts just like the +.I m +command, except that a copy of the addressed lines is placed +after address +.I a +(which may be 0). +`\fB.\fR' is left on the last line of the copy. +.TP 5 +.B u +The undo command restores +the contents of the buffer +before the last command was executed. +If the undo command is given twice, +the current state is restored. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBv/\fIregular expression\fB/\fIcommand list\fR +This command is the same as the global command +.I g +except that the command list is executed +.I g +with `\fB.\fR' initially set to every line +.I except +those +matching the regular expression. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBV/\fIregular expression\fB/\fR +This command is the same as the interactive global command +.I G +except that the commands are read +.I g +with `\fB.\fR' initially set to every line +.I except +those +matching the regular expression. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBw\ \fIfilename\fR +.br +The write command writes the addressed lines onto +the given file. +If the file does not exist, +it is created mode 666 (readable and writable by everyone)\|. +The file name is remembered if there was no +remembered file name already. +If no file name is given, +the remembered file name, if any, is used +(see +.I e +and +.I f +commands)\|. +`\fB.\fR' is unchanged. +If the command is successful, the number of characters written is +printed. +A `filename' starting with a `\fB!\fR' +causes the string following this character +to be executed as a shell command +with the addressed lines as standard input. +.TP +\fR(\fI1\fB,\fI$\fR)\fBW\ \fIfilename\fR +This command is the same as +.I w, +except that the addressed lines are appended to the file. +.TP 5 +\fR(\fI$\fR)\|\fB=\fR +The line number of the addressed line is typed. +`\fB.\fR' is unchanged by this command. +.TP 5 +\fB!\fR<shell command> +The remainder of the line after the `!' is sent +to +.IR sh (1) +to be interpreted as a command. +.RB ` . ' +is unchanged. +If the command starts with a +.RB ` ! ', +the previous command is inserted. +A +.RB ` % ' +causes the current file name to be inserted. +.TP 5 +\fR(\|\fI.+1\fR)\|<newline> +An address alone on a line causes the addressed line to be printed. +A blank line alone is equivalent to `.+1p'; it is useful +for stepping through text. +.PP +The following commands are extensions: +.TP 5 +\fR(\|\fI.\|\fR)\fB\|b\fR[\fIcount\fR] +Prints a screenful of lines, +starting at the addressed one, +and browses forward in the buffer by this amount. +With the optional +.I count +argument, the screen size for this and following +.I b +commands is set to the given number of lines. +.TP 5 +.B help +Causes a summary of +.I ed +commands along with short descriptions +to be printed on the terminal. +.TP 5 +.B N +Makes the +.I p +command behave like the +.I n +command and vice-versa. +If given a second time, +the original semantics are restored. +.TP 5 +\fR(\|\fI.\|\fR)\fB\|o\fR[\fIcount\fR] +Prints a screenful of lines centered around the addressed one. +The current line is not changed. +With the optional +.I count +argument, the amount of lines printed above and below +for this and following +.I o +commands is set to the given number. +.TP 5 +.B z +Performs the same actions as a +.I w +command followed by a +.I q +command. +.PP +If an interrupt signal is sent, +.I ed +prints a `?' and returns to its command level. +.PP +An input line that consists exactly of the two characters `\e.' +causes a period `.' to be inserted with the +.IR a , +.IR c , +and +.IR i +commands +in +.B /usr/5bin/ed +and +.BR /usr/5bin/s42/ed . +.PP +Some size limitations: +The maximum number of bytes in the buffer +corresponds to the address size; +on machines with 32-bit addressing, +it is 2\ G bytes, +with 64-bit addressing, +it is 9\ E bytes. +The limit on the number of lines depends on the amount of core: +each line takes 2 words. +.PP +If a line contains a NUL character, +regular expressions cannot match beyond this character. +A substitute command deletes a NUL +and all following characters on the line. +NUL characters in command input are discarded. +If an input file does not end with a newline, +.I ed +prints a message and appends one. +.PP +Omission of the `/' character +following the regular expression or the replacement string +to the global and substitute commands +causes the affected lines to be printed. +Thus the following commands have the same effect: +.RS +g/pattern g/pattern/p +.br +s/pattern/repl s/pattern/repl/p +.br +s/pattern/ s/pattern//p +.RE +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +the availability and composition of character classes +in basic regular expressions, +and the set of printable characters for the +.I l +command. +.TP +.B TMPDIR +Determines the location of the temporary file +if it contains the name of an accessible directory. +.SH FILES +/var/tmp/e* +.br +/tmp/e* +.br +ed.hup: work is saved here if terminal hangs up +.SH "SEE ALSO" +B. W. Kernighan, +.I +A Tutorial Introduction to the ED Text Editor +.br +B. W. Kernighan, +.I Advanced editing on UNIX +.br +bfs(1), +grep(1), +sed(1), +sh(1) +.SH DIAGNOSTICS +`?name' for inaccessible file; +`?' for +errors in commands, +possibly followed by a verbose description +(see the description for the +.I h +and +.I H +commands above). +.PP +To protect against throwing away valuable work, +a +.I q +or +.I e +command is considered to be in error, unless a +.I w +has occurred since the last buffer change. +A second +.I q +or +.I e +will be obeyed regardless. +.SH NOTES +A +.I !\& +command cannot be subject to a +.I g +command. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +For portable programs, restrict textual data +to the US-ASCII character set, +set the LC_CTYPE and LC_COLLATE variables to `C' or `POSIX', +and use the constructs in the second column +instead of the character class expressions as follows: +.RS +.sp +.TS +l l. +[[:alnum:]] [0\-9A\-Za\-z] +[[:alpha:]] [A\-Za\-z] +[[:blank:]] [\fI<tab><space>\fR] +[[:cntrl:]] [^\fI<space>\fR\-~] +[[:digit:]] [0\-9] +[[:graph:]] [!\-~] +[[:lower:]] [a\-z] +[[:print:]] [\fI<space>\fR\-~] +[[:punct:]] [!\-/:\-@[\-`{\-~] +[[:space:]] [\fI<tab><vt><ff><cr><space>\fR] +[[:upper:]] [A\-Z] +[[:xdigit:]] [0\-9a\-fA\-F] +.TE +.sp +.RE +.IR <tab> , +.IR <space> , +.IR <vt> , +.IR <ff> , +and +.I <cr> +indicate inclusion of +a literal tabulator, space, vertical tabulator, formfeed, +or carriage return character, respectively. +Do not put the +.IR <vt> , +.IR <ff> , +and +.I <cr> +characters into the range expression for the +.I space +class unless you actually want to match these characters. diff --git a/ed/ed.c b/ed/ed.c @@ -0,0 +1,2822 @@ +/* + * Editor + */ + +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, July 2003. + */ +/* from Unix 32V /usr/src/cmd/ed.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)ed_su3.sl 1.99 (gritter) 7/27/06"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)ed_sus.sl 1.99 (gritter) 7/27/06"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)ed_s42.sl 1.99 (gritter) 7/27/06"; +#else /* !SU3, !SUS, !S42 */ +static const char sccsid[] USED = "@(#)ed.sl 1.99 (gritter) 7/27/06"; +#endif /* !SU3, !SUS, !S42 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <stdlib.h> +#include <signal.h> +#include "sigset.h" +#include <termios.h> +#include <setjmp.h> +#include <libgen.h> +#include <inttypes.h> +#include <locale.h> +#include <wchar.h> +#include <ctype.h> +#include <wctype.h> +#include <limits.h> +#include <termios.h> +static int FNSIZE; +static int LBSIZE; +static int RHSIZE; +#define ESIZE 2048 +static int GBSIZE; +#undef EOF +#define EOF -1 +#define puts(s) xxputs(s) +#define getline(t, n) xxgetline(t, n) + +#if (LONG_MAX > 017777777777L) +#define MAXCNT 0777777777777777777777L /* 2^63-1 */ +#else +#define MAXCNT 017777777777L /* 2^31-1 */ +#endif +#define BLKMSK (MAXCNT>>8) /* was 0377 */ + +#define READ 0 +#define WRITE 1 +#define EXIST 2 + +struct tabulator { + struct tabulator *t_nxt; /* next list element */ + const char *t_str; /* tabulator string */ + int t_tab; /* tab stop position */ + int t_rep; /* repetitive tab count */ +}; + +static int peekc; +static int lastc; +static char *savedfile; +static char *file; +static struct stat fstbuf; +static char *linebuf; +static char *rhsbuf; +static char expbuf[ESIZE + 4]; +static long *zero; +static long *undzero; +static long *dot; +static long *unddot; +static long *dol; +static long *unddol; +static long *addr1; +static long *addr2; +static char *genbuf; +static long count; +static char *linebp; +static int ninbuf; +static int io; +static int ioeof; +static int pflag; +static char *wrtemp; +static uid_t myuid; +static void (*oldhup)(int); +static void (*oldquit)(int); +static void (*oldpipe)(int); +static int vflag = 1; +static int listf; +static int numbf; +static char *globp; +static int tfile = -1; +static long tline; +static char tfname[64]; +static char ibuff[512]; +static int iblock = -1; +static char obuff[512]; +static int oblock = -1; +static int ichanged; +static int nleft; +static long *names; +static long *undnames; +static int anymarks; +static int subnewa; +static int fchange; +static int wrapp; +static unsigned nlall = 128; +static const char *progname; +static const char *prompt = "*"; +static int Pflag; +static int prhelp; +static const char *prvmsg; +static int lastsig; +static int pipid = -1; +static int readop; +static int status; +static int mb_cur_max; +static int needsub; +static int insub; +static struct tabulator *tabstops; +static int maxlength; +static int rspec; +static int Nflag; +static int bcount = 22; +static int ocount = 11; + +static jmp_buf savej; + +static void usage(char, int); +static void commands(void); +static long *address(void); +static void setdot(void); +static void setall(void); +static void setnoaddr(void); +static void nonzero(void); +static void newline(void); +static void filename(int); +static void exfile(void); +static void onintr(int); +static void onhup(int); +static void onpipe(int); +static void error(const char *); +static void error2(const char *, const char *); +static void errput(const char *, const char *); +static int getchr(void); +static int gettty(void); +static long getnum(void); +static int getfile(void); +static void putfile(void); +static int append(int (*)(void), long *); +static void callunix(void); +static char *readcmd(void); +static void quit(int); +static void delete(void); +static void rdelete(long *, long *); +static void gdelete(void); +static char *getline(long, int); +static int putline(void); +static char *getblock(long, long); +static void blkio(long, char *, int); +static void init(void); +static void global(int, int); +static void globrd(char **, int); +static void join(void); +static void substitute(int); +static int compsub(void); +static int getsub(void); +static int dosub(int); +static int place(int, const char *, const char *); +static void move(int); +static void reverse(long *, long *); +static int getcopy(void); +static int execute(int, long *, int); +static void cmplerr(int); +static void doprnt(long *, long *); +static void putd(long); +static void puts(const char *); +static void nlputs(const char *); +static void list(const char *); +static int lstchr(int); +static void putstr(const char *); +static void putchr(int); +static void checkpoint(void); +static void undo(void); +static int maketf(int); +static int creatf(const char *); +static int sopen(const char *, int); +static void sclose(int); +static void fspec(const char *); +static const char *ftok(const char **); +static struct tabulator *tabstring(const char *); +static void freetabs(void); +static void expand(const char *); +static void growlb(const char *); +static void growrhs(const char *); +static void growfn(const char *); +static void help(void); + +#define INIT +#define GETC() getchr() +#define UNGETC(c) (peekc = c) +#define PEEKC() (peekc = getchr()) +#define RETURN(c) return c +#define ERROR(c) cmplerr(c) +static wint_t GETWC(char *); + +#if defined (SUS) || defined (S42) || defined (SU3) + +#include <regex.h> + +#define NBRA 9 + +static char *braslist[NBRA]; +static char *braelist[NBRA]; +static char *loc1, *loc2, *locs; +static int nbra; +static int circf; +static int nodelim; + +static char *compile(char *, char *, const char *, int); +static int step(const char *, const char *); + +#else /* !SUS, !S42, !SU3 */ + +#include <regexp.h> + +#endif /* !SUS, !S42, !SU3 */ + +int +main(int argc, char **argv) +{ + register int i; + void (*oldintr)(int); + + progname = basename(argv[0]); +#if defined (SUS) || defined (S42) || defined (SU3) + setlocale(LC_COLLATE, ""); +#endif + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + myuid = getuid(); + oldquit = sigset(SIGQUIT, SIG_IGN); + oldhup = sigset(SIGHUP, SIG_IGN); + oldintr = sigset(SIGINT, SIG_IGN); + if (sigset(SIGTERM, SIG_IGN) != SIG_IGN) + sigset(SIGTERM, quit); + oldpipe = sigset(SIGPIPE, onpipe); + argv++; + while (argc > 1 && **argv=='-') { + if ((*argv)[1] == '\0') { + vflag = 0; + goto next; + } else if ((*argv)[1] == '-' && (*argv)[2] == '\0') { + argv++; + argc--; + break; + } + letter: switch((*argv)[1]) { + + case 's': + vflag = 0; + break; + + case 'q': + sigset(SIGQUIT, SIG_DFL); + vflag = 1; + break; + + case 'p': + if ((*argv)[2]) + prompt = &(*argv)[2]; + else if (argv[1]) { + prompt = argv[1]; + argv++; + argc--; + } else + usage((*argv)[1], 1); + Pflag = 1; + goto next; + + default: + usage((*argv)[1], 0); + } + if ((*argv)[2]) { + (*argv)++; + goto letter; + } + next: argv++; + argc--; + } + + growfn("no space"); + if (argc>1) { + i = -1; + do + if (++i >= FNSIZE) + growfn("maximum of characters in " + "file names reached"); + while (savedfile[i] = (*argv)[i]); + globp = "e"; + } + names = malloc(26*sizeof *names); + undnames = malloc(26*sizeof *undnames); + zero = malloc(nlall*sizeof *zero); + if ((undzero = malloc(nlall*sizeof *undzero)) == NULL) + puts("no memory for undo"); + growlb("no space"); + growrhs("no space"); + init(); + if (oldintr != SIG_IGN) + sigset(SIGINT, onintr); + if (oldhup != SIG_IGN) + sigset(SIGHUP, onhup); + setjmp(savej); + if (lastsig) { + sigrelse(lastsig); + lastsig = 0; + } + commands(); + quit(0); + /*NOTREACHED*/ + return 0; +} + +static void +usage(char c, int misarg) +{ + if (c) { + write(2, progname, strlen(progname)); + if (misarg) + write(2, ": option requires an argument -- ", 33); + else + write(2, ": illegal option -- ", 20); + write(2, &c, 1); + write(2, "\n", 1); + } + write(2, "usage: ", 7); + write(2, progname, strlen(progname)); + write(2, " [- | -s] [-p string] [file]\n", 29); + exit(2); +} + +static void +commands(void) +{ + register long *a1; + register int c; + int n; + + for (;;) { + if (pflag) { + pflag = 0; + addr1 = addr2 = dot; + goto print; + } + if (Pflag && globp == NULL) + write(1, prompt, strlen(prompt)); + addr1 = 0; + addr2 = 0; + switch (c = getchr()) { + case ',': + case ';': + addr2 = c == ',' ? zero+1 : dot; + if (((peekc = getchr()) < '0' || peekc > '9') && + peekc != ' ' && peekc != '\t' && + peekc != '+' && peekc != '-' && + peekc != '^' && peekc != '?' && + peekc != '/' && peekc != '$' && + peekc != '.' && peekc != '\'') { + addr1 = addr2; + a1 = dol; + goto loop; + } + break; + default: + peekc = c; + } + do { + addr1 = addr2; + if ((a1 = address())==0) { + c = getchr(); + break; + } + loop: addr2 = a1; + if ((c=getchr()) == ';') { + c = ','; + dot = a1; + } + } while (c==','); + if (addr1==0) + addr1 = addr2; + switch(c) { + + case 'a': + setdot(); + newline(); + checkpoint(); + append(gettty, addr2); + continue; + + case 'c': +#if defined (SU3) + if (addr1 == zero && addr1+1 <= dol) { + if (addr1 == addr2) + addr2++; + addr1++; + } +#endif /* SU3 */ + delete(); + append(gettty, addr1-1); +#if defined (SUS) || defined (SU3) + if (dot == addr1-1 && addr1 <= dol) + dot = addr1; +#endif /* SUS || SU3 */ + continue; + + case 'd': + delete(); + continue; + + case 'E': + fchange = 0; + c = 'e'; + case 'e': + setnoaddr(); + if (vflag && fchange) { + fchange = 0; + error("warning: expecting `w'"); + } + filename(c); + init(); + addr2 = zero; + goto caseread; + + case 'f': + setnoaddr(); + filename(c); + puts(savedfile); + continue; + + case 'g': + global(1, 0); + continue; + + case 'G': + global(1, 1); + continue; + + case 'H': + prhelp = !prhelp; + /*FALLTHRU*/ + + case 'h': + if ((peekc = getchr()) == 'e') { + peekc = 0; + if (getchr() != 'l' || getchr() != 'p' || + getchr() != '\n') + error("illegal suffix"); + setnoaddr(); + help(); + continue; + } + newline(); + setnoaddr(); + if (prvmsg) + puts(prvmsg); + continue; + + case 'i': + setdot(); +#if defined (SU3) + if (addr1 == zero) { + if (addr1 == addr2) + addr2++; + addr1++; + if (dol != zero) + nonzero(); + } else +#endif /* SU3 */ + nonzero(); + newline(); + checkpoint(); + append(gettty, addr2-1); + if (dot == addr2-1) + dot++; + continue; + + + case 'j': + if (addr2==0) { + addr1 = dot; + addr2 = dot+1; + } + setdot(); + newline(); + nonzero(); + checkpoint(); + if (addr1 != addr2) + join(); + continue; + + case 'k': + if ((c = getchr()) < 'a' || c > 'z') + error("mark not lower case"); + newline(); + setdot(); + nonzero(); + names[c-'a'] = *addr2 & ~01; + anymarks |= 01; + continue; + + case 'm': + move(0); + continue; + + case '\n': + if (addr2==0) + addr2 = dot+1; + addr1 = addr2; + goto print; + + case 'n': + numbf = 1; + newline(); + goto print; + + case 'N': + newline(); + setnoaddr(); + Nflag = !Nflag; + continue; + + case 'b': + case 'o': + n = getnum(); + newline(); + setdot(); + nonzero(); + if (n >= 0) { + if (c == 'b') + bcount = n; + else + ocount = n; + } + if (c == 'b') { + a1 = addr2+bcount > dol ? dol : addr2 + bcount; + doprnt(addr1, a1); + dot = a1; + } else { + a1 = addr2+ocount > dol ? dol : addr2 + ocount; + doprnt(addr2-ocount<zero+1?zero+1:addr2-ocount, a1); + dot = addr2; + } + continue; + + case 'l': + listf++; + case 'p': + newline(); + print: + setdot(); + nonzero(); + doprnt(addr1, addr2); + dot = addr2; + continue; + + case 'P': + setnoaddr(); + newline(); + Pflag = !Pflag; + continue; + + case 'Q': + fchange = 0; + case 'q': + setnoaddr(); + newline(); + quit(0); + + case 'r': + filename(c); + caseread: + if ((io = sopen(file, READ)) < 0) { + lastc = '\n'; + error2("cannot open input file", file); + } + ioeof = 0; + setall(); + ninbuf = 0; + if (c == 'r') + checkpoint(); + n = zero != dol; + rspec = (c == 'e' || !n) && file[0] != '!'; + append(getfile, addr2); + rspec = 0; + exfile(); + fchange = n; + continue; + + case 's': + setdot(); + nonzero(); + substitute(globp!=0); + continue; + + case 't': + move(1); + continue; + + case 'u': + setdot(); + newline(); + if (unddot == NULL) + error("nothing to undo"); + undo(); + continue; + + case 'v': + global(0, 0); + continue; + + case 'V': + global(0, 1); + continue; + + case 'W': + wrapp++; + case 'w': + write: + setall(); + if (zero != dol) + nonzero(); + filename(c); + if(!wrapp || + ((io = open(file,O_WRONLY|O_APPEND)) == -1) || + ((lseek(io, 0, SEEK_END)) == -1)) { + struct stat st; + if (lstat(file, &st) == 0 && + (st.st_mode&S_IFMT) == S_IFREG && + st.st_nlink == 1 && + (myuid==0 || myuid==st.st_uid)) { + char *cp, *tp; + int nio; + if ((io = sopen(file, EXIST)) < 0) + error("cannot create output file"); + if ((wrtemp = malloc(strlen(file)+8)) == NULL) + error("out of memory"); + for (cp = file, tp = wrtemp; *cp; cp++) + *tp++ = *cp; + while (tp > wrtemp && tp[-1] != '/') + tp--; + for (cp = "\7XXXXXX"; *cp; cp++) + *tp++ = *cp; + *tp = '\0'; + if ((nio = mkstemp(wrtemp)) < 0) { + free(wrtemp); + wrtemp = NULL; + ftruncate(io, 0); + } else { + close(io); + io = nio; + } + } else { + if ((io = sopen(file, WRITE)) < 0) + error("cannot create output file"); + } + } + if (zero != dol) { + ioeof = 0; + wrapp = 0; + putfile(); + } + exfile(); + if (addr1==zero+1 && addr2==dol || addr1==addr2 && dol==zero) + fchange = 0; + if (c == 'z') + quit(0); + continue; + + case 'z': + if ((peekc=getchr()) != '\n') + error("illegal suffix"); + setnoaddr(); + goto write; + + case '=': + setall(); + newline(); + putd((addr2-zero)&MAXCNT); + putchr('\n'); + continue; + + case '!': + callunix(); + continue; + + case EOF: + return; + + } + error("unknown command"); + } +} + +static long * +address(void) +{ + register long *a1; + register int minus, c; + int n, relerr; + + minus = 0; + a1 = 0; + for (;;) { + c = getchr(); + if ('0'<=c && c<='9') { + n = 0; + do { + n *= 10; + n += c - '0'; + } while ((c = getchr())>='0' && c<='9'); + peekc = c; + if (a1==0) + a1 = zero; + if (minus<0) + n = -n; + a1 += n; + minus = 0; + continue; + } + relerr = 0; + if (a1 || minus) + relerr++; + switch(c) { + case ' ': + case '\t': + continue; + + case '+': + minus++; + if (a1==0) + a1 = dot; + continue; + + case '-': + case '^': + minus--; + if (a1==0) + a1 = dot; + continue; + + case '?': + case '/': + compile(NULL, expbuf, &expbuf[ESIZE], c); + a1 = dot; + for (;;) { + if (c=='/') { + a1++; + if (a1 > dol) + a1 = zero; + } else { + a1--; + if (a1 < zero) + a1 = dol; + } + if (execute(0, a1, 0)) + break; + if (a1==dot) + error("search string not found"); + } + break; + + case '$': + a1 = dol; + break; + + case '.': + a1 = dot; + break; + + case '\'': + if ((c = getchr()) < 'a' || c > 'z') + error("mark not lower case"); + for (a1=zero; a1<=dol; a1++) + if (names[c-'a'] == (*a1 & ~01)) + break; + break; + + default: + peekc = c; + if (a1==0) + return(0); + a1 += minus; + if (a1<zero || a1>dol) + error("line out of range"); + return(a1); + } + if (relerr) + error("bad number"); + } +} + +static void +setdot(void) +{ + if (addr2 == 0) + addr1 = addr2 = dot; + if (addr1 > addr2) + error("bad range"); +} + +static void +setall(void) +{ + if (addr2==0) { + addr1 = zero+1; + addr2 = dol; + if (dol==zero) + addr1 = zero; + } + setdot(); +} + +static void +setnoaddr(void) +{ + if (addr2) + error("Illegal address count"); +} + +static void +nonzero(void) +{ + if (addr1<=zero || addr2>dol) + error("line out of range"); +} + +static void +newline(void) +{ + register int c; + + if ((c = getchr()) == '\n') + return; + if (c=='p' || c=='l' || c=='n') { + pflag++; + if (c=='l') + listf++; + else if (c=='n') + numbf = 1; + if (getchr() == '\n') + return; + } + error("illegal suffix"); +} + +static void +filename(int comm) +{ + register char *p1, *p2; + register int c, i; + + count = 0; + c = getchr(); + if (c=='\n' || c==EOF) { + p1 = savedfile; + if (*p1==0 && comm!='f') + error("illegal or missing filename"); + p2 = file; + while (*p2++ = *p1++) + ; + return; + } + if (c!=' ') + error("no space after command"); + while ((c = getchr()) == ' ') + ; + if (c=='\n') + error("illegal or missing filename"); + i = 0; + do { + if (i >= FNSIZE) + growfn("maximum of characters in file names reached"); + file[i++] = c; + if (c==' ' && file[0] != '!' || c==EOF) + error("illegal or missing filename"); + } while ((c = getchr()) != '\n'); + file[i++] = 0; + if ((savedfile[0]==0 || comm=='e' || comm=='f') && file[0] != '!') { + p1 = savedfile; + p2 = file; + while (*p1++ = *p2++) + ; + } +} + +static void +exfile(void) +{ + sclose(io); + io = -1; + if (wrtemp) { + extern int rename(const char *, const char *); + if (rename(wrtemp, file) < 0) + error("cannot create output file"); + if (myuid == 0) + chown(file, fstbuf.st_uid, fstbuf.st_gid); + chmod(file, fstbuf.st_mode & 07777); + free(wrtemp); + wrtemp = NULL; + } + if (vflag) { + putd(count); + putchr('\n'); + } +} + +static void +onintr(int signo) +{ + lastsig = signo; + putchr('\n'); + lastc = '\n'; + if (readop) { + puts("\007read may be incomplete - beware!\007"); + fchange = 0; + } + error("interrupt"); +} + +static void +onhup(int signo) +{ + if (dol > zero && fchange) { + addr1 = zero+1; + addr2 = dol; + io = creat("ed.hup", 0666); + if (io < 0) { + char *home = getenv("HOME"); + if (home) { + char *fn = malloc(strlen(home) + 10); + if (fn) { + strcpy(fn, home); + strcat(fn, "/ed.hup"); + io = creat(fn, 0666); + } + } + } + if (io >= 0) + putfile(); + } + fchange = 0; + status = 0200 | signo; + quit(0); +} + +static void +onpipe(int signo) +{ + lastsig = signo; + error("write or open on pipe failed"); +} + +static void +error(const char *s) +{ + error2(s, NULL); +} + +static void +error2(const char *s, const char *fn) +{ + register int c; + + wrapp = 0; + listf = 0; + numbf = 0; + errput(s, fn); + count = 0; + if (lseek(0, 0, SEEK_END) > 0) + status = 2; + pflag = 0; + if (globp) + lastc = '\n'; + globp = 0; + peekc = lastc; + if(lastc) + while ((c = getchr()) != '\n' && c != EOF) + ; + if (io > 0) { + sclose(io); + io = -1; + } + if (wrtemp) { + unlink(wrtemp); + free(wrtemp); + wrtemp = NULL; + } + longjmp(savej, 1); +} + +static void +errput(const char *s, const char *fn) +{ + prvmsg = s; + if (fn) { + putchr('?'); + puts(fn); + } else + puts("?"); + if (prhelp) + puts(s); +} + +static int +getchr(void) +{ + char c; + if (lastc=peekc) { + peekc = 0; + return(lastc); + } + if (globp) { + if ((lastc = *globp++) != 0) + return(lastc); + globp = 0; + return(EOF); + } + if (read(0, &c, 1) <= 0) + return(lastc = EOF); + lastc = c; + return(lastc); +} + +static int +gettty(void) +{ + register int c, i; + register char *gf; + + i = 0; + gf = globp; + while ((c = getchr()) != '\n') { + if (c==EOF) { + if (gf) + peekc = c; + return(c); + } + if (c == 0) + continue; + if (i >= LBSIZE) + growlb("line too long"); + linebuf[i++] = c; + } + if (i >= LBSIZE-2) + growlb("line too long"); + linebuf[i++] = 0; + if (linebuf[0]=='.' && linebuf[1]==0) + return(EOF); +#if !defined (SUS) && !defined (SU3) + if (linebuf[0]=='\\' && linebuf[1]=='.' && linebuf[2]==0) + linebuf[0]='.', linebuf[1]=0; +#endif + return(0); +} + +static long +getnum(void) +{ + char scount[20]; + int i; + + i = 0; + while ((peekc=getchr()) >= '0' && peekc <= '9' && i < sizeof scount) { + scount[i++] = peekc; + peekc = 0; + } + scount[i] = '\0'; + return i ? atol(scount) : -1; +} + +static int +getfile(void) +{ + register int c, i, j; + static int nextj; + + i = 0; + j = nextj; + do { + if (--ninbuf < 0) { + if (ioeof || (ninbuf=read(io, genbuf, LBSIZE)-1) < 0) { + if (ioeof == 0 && ninbuf < -1) { + puts("input error"); + status = 1; + } + if (i > 0) { + puts("'\\n' appended"); + c = '\n'; + ioeof = 1; + goto wrc; + } + return(EOF); + } + j = 0; + } + c = genbuf[j++]&0377; + wrc: if (i >= LBSIZE) { + lastc = '\n'; + growlb("line too long"); + } + linebuf[i++] = c ? c : '\n'; + count++; + } while (c != '\n'); + linebuf[--i] = 0; + nextj = j; + if (rspec && dot == zero) + fspec(linebuf); + if (maxlength && i > maxlength) { + putstr("line too long: lno = "); + putd((dot - zero+1)&MAXCNT); + putchr('\n'); + } + return(0); +} + +static void +putfile(void) +{ + long *a1; + int n; + register char *fp, *lp; + register int nib; + + nib = 512; + fp = genbuf; + a1 = addr1; + do { + lp = getline(*a1++, 0); + if (maxlength) { + for (n = 0; lp[n]; n++); + if (n > maxlength) { + putstr("line too long: lno = "); + putd((a1-1 - zero)&MAXCNT); + putchr('\n'); + } + } + for (;;) { + if (--nib < 0) { + n = fp-genbuf; + if(write(io, genbuf, n) != n) + error("write error"); + nib = 511; + fp = genbuf; + } + count++; + if ((*fp++ = *lp++) == 0) { + fp[-1] = '\n'; + break; + } else if (fp[-1] == '\n') + fp[-1] = '\0'; + } + } while (a1 <= addr2); + n = fp-genbuf; + if(write(io, genbuf, n) != n) + error("write error"); +} + +static int +append(int (*f)(void), long *a) +{ + register long *a1, *a2, *rdot; + int nline, tl; + + nline = 0; + dot = a; + while ((*f)() == 0) { + if ((dol-zero)+1 >= nlall) { + long *ozero = zero; + nlall += 512; + if ((zero = realloc(zero, nlall*sizeof *zero))==NULL) { + lastc = '\n'; + zero = ozero; + error("out of memory for append"); + } + dot += zero - ozero; + dol += zero - ozero; + addr1 += zero - ozero; + addr2 += zero - ozero; + if (unddot) { + unddot += zero - ozero; + unddol += zero - ozero; + } + if (undzero) { + ozero = undzero; + if ((undzero = realloc(undzero, + nlall*sizeof *undzero)) == 0) { + puts("no memory for undo"); + free(ozero); + } + } + } + tl = putline(); + nline++; + a1 = ++dol; + a2 = a1+1; + rdot = ++dot; + while (a1 > rdot) + *--a2 = *--a1; + *rdot = tl; + } + return(nline); +} + +static void +callunix(void) +{ + char *line; + void (*savint)(int); + pid_t pid, rpid; + int retcode; + + setnoaddr(); + line = readcmd(); + if ((pid = fork()) == 0) { + sigset(SIGHUP, oldhup); + sigset(SIGQUIT, oldquit); + sigset(SIGPIPE, oldpipe); + execl(SHELL, "sh", "-c", line, NULL); + _exit(0100); + } else if (pid < 0) + error("fork failed - try again"); + savint = sigset(SIGINT, SIG_IGN); + while ((rpid = wait(&retcode)) != pid && rpid != -1) + ; + sigset(SIGINT, savint); + if (vflag) + puts("!"); +} + +#define cmadd(c) ((i>=cmsize ? \ + ((line=realloc(line,cmsize+=128)) == 0 ? \ + (error("line too long"),0) : 0, 0) \ + : 0), line[i++]=(c)) + +static char * +readcmd(void) +{ + static char *line, *prev; + static int cmsize, pvsize; + char *pp; + int c, mod = 0, i; + + i = 0; + if ((c = getchr()) == '!') { + for (pp = prev; *pp; pp++) + line[i++] = *pp; + mod = 1; + c = getchr(); + } + while (c != '\n' && c != EOF) { + if (c == '\\') { + c = getchr(); + if (c != '%') + cmadd('\\'); + cmadd(c); + } else if (c == '%') { + for (pp = savedfile; *pp; pp++) + cmadd(*pp); + mod = 1; + } else + cmadd(c); + c = getchr(); + } + cmadd('\0'); + if (pvsize < cmsize && (prev = realloc(prev, pvsize=cmsize)) == 0) + error("line too long"); + strcpy(prev, line); + if (mod) + nlputs(line); + return line; +} + +static void +quit(int signo) +{ + lastsig = signo; + if (vflag && fchange) { + fchange = 0; + error("warning: expecting `w'"); + } + if (wrtemp) + unlink(wrtemp); + unlink(tfname); + exit(status); +} + +static void +delete(void) +{ + setdot(); + newline(); + nonzero(); + checkpoint(); + rdelete(addr1, addr2); +} + +static void +rdelete(long *ad1, long *ad2) +{ + register long *a1, *a2, *a3; + + a1 = ad1; + a2 = ad2+1; + a3 = dol; + dol -= a2 - a1; + do { + *a1++ = *a2++; + } while (a2 <= a3); + a1 = ad1; + if (a1 > dol) + a1 = dol; + dot = a1; + fchange = 1; +} + +static void +gdelete(void) +{ + register long *a1, *a2, *a3; + + a3 = dol; + for (a1=zero+1; (*a1&01)==0; a1++) + if (a1>=a3) + return; + for (a2=a1+1; a2<=a3;) { + if (*a2&01) { + a2++; + dot = a1; + } else + *a1++ = *a2++; + } + dol = a1-1; + if (dot>dol) + dot = dol; + fchange = 1; +} + +static char * +getline(long tl, int nulterm) +{ + register char *bp, *lp; + register long nl; + + lp = linebuf; + bp = getblock(tl, READ); + nl = nleft; + tl &= ~0377; + while (*lp++ = *bp++) { + if (lp[-1] == '\n' && nulterm) { + lp[-1] = '\0'; + break; + } + if (--nl == 0) { + bp = getblock(tl+=0400, READ); + nl = nleft; + } + } + return(linebuf); +} + +static int +putline(void) +{ + register char *bp, *lp; + register long nl; + long tl; + + fchange = 1; + lp = linebuf; + tl = tline; + bp = getblock(tl, WRITE); + nl = nleft; + tl &= ~0377; + while (*bp = *lp++) { + if (*bp++ == '\n' && insub) { + *--bp = 0; + linebp = lp; + break; + } + if (--nl == 0) { + bp = getblock(tl+=0400, WRITE); + nl = nleft; + } + } + nl = tline; + tline += (((lp-linebuf)+03)>>1)&(MAXCNT-1); + return(nl); +} + +static char * +getblock(long atl, long iof) +{ + register long bno, off; + + bno = (atl>>8)&BLKMSK; + off = (atl<<1)&0774; + if (bno >= BLKMSK) { + lastc = '\n'; + error("temp file too big"); + } + nleft = 512 - off; + if (bno==iblock) { + ichanged |= iof; + return(ibuff+off); + } + if (bno==oblock) + return(obuff+off); + if (iof==READ) { + if (ichanged) + blkio(iblock, ibuff, 1); + ichanged = 0; + iblock = bno; + blkio(bno, ibuff, 0); + return(ibuff+off); + } + if (oblock>=0) + blkio(oblock, obuff, 1); + oblock = bno; + return(obuff+off); +} + +static void +blkio(long b, char *buf, int wr) +{ + lseek(tfile, b<<9, SEEK_SET); + if ((wr ? write(tfile, buf, 512) : read (tfile, buf, 512)) != 512) { + status = 1; + error("I/O error on temp file"); + } +} + +static void +init(void) +{ + register long *markp; + + tline = 2; + for (markp = names; markp < &names[26]; markp++) + *markp = 0; + for (markp = undnames; markp < &undnames[26]; markp++) + *markp = 0; + subnewa = 0; + anymarks = 0; + iblock = -1; + oblock = -1; + ichanged = 0; + tfile = maketf(tfile); + dot = dol = zero; + unddot = NULL; +} + +static void +global(int k, int ia) +{ + register int c; + register long *a1; + static char *globuf; + char mb[MB_LEN_MAX+1]; + int spflag = 0; + + if (globp) + error("multiple globals not allowed"); + setall(); + nonzero(); + if ((c=GETWC(mb))=='\n') + error("incomplete global expression"); + compile(NULL, expbuf, &expbuf[ESIZE], c); + if (!ia) { + globrd(&globuf, EOF); + if (globuf[0] == '\n') + globuf[0] = 'p', globuf[1] = '\n', globuf[2] = '\0'; + } else { + newline(); + spflag = pflag; + pflag = 0; + } + checkpoint(); + for (a1=zero; a1<=dol; a1++) { + *a1 &= ~01; + if (a1>=addr1 && a1<=addr2 && execute(0, a1, 0)==k) + *a1 |= 01; + } + /* + * Special case: g/.../d (avoid n^2 algorithm) + */ + if (!ia && globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { + gdelete(); + return; + } + for (a1=zero; a1<=dol; a1++) { + if (*a1 & 01) { + *a1 &= ~01; + dot = a1; + if (ia) { + puts(getline(*a1, 0)); + if ((c = getchr()) == EOF) + error("command expected"); + if (c == 'a' || c == 'c' || c == 'i') + error("a, i, or c not allowed in G"); + else if (c == '&') { + if ((c = getchr()) != '\n') + error("end of line expected"); + if (globuf == 0 || *globuf == 0) + error("no remembered command"); + } else if (c == '\n') { + a1 = zero; + continue; + } else + globrd(&globuf, c); + } + globp = globuf; + commands(); + globp = NULL; + a1 = zero; + } + } + if (ia) + pflag = spflag; +} + +static void +globrd(char **globuf, register int c) +{ + register int i; + + if (*globuf == 0 && (*globuf = malloc(GBSIZE=256)) == 0) + error("global too long"); + i = 0; + if (c != EOF) + (*globuf)[i++] = c; + while ((c = getchr()) != '\n') { + if (c==EOF) + error("incomplete global expression"); + if (c=='\\') { + c = getchr(); + if (c!='\n') + (*globuf)[i++] = '\\'; + } + (*globuf)[i++] = c; + if (i>=GBSIZE-4 && (*globuf=realloc(*globuf,GBSIZE+=256)) == 0) + error("global too long"); + } + (*globuf)[i++] = '\n'; + (*globuf)[i++] = 0; +} + +static void +join(void) +{ + register int i, j; + register long *a1; + + j = 0; + for (a1=addr1; a1<=addr2; a1++) { + i = getline(*a1, 0) - linebuf; + while (genbuf[j] = linebuf[i++]) + if (j++ >= LBSIZE-2) + growlb("line too long"); + } + i = 0; + j = 0; + while (linebuf[i++] = genbuf[j++]) + ; + *addr1 = putline(); + if (addr1<addr2) + rdelete(addr1+1, addr2); + dot = addr1; +} + +static void +substitute(int inglob) +{ + register long *markp; + register long *a1; + intptr_t nl; + int gsubf; + + checkpoint(); + gsubf = compsub(); + insub = 1; + for (a1 = addr1; a1 <= addr2; a1++) { + long *ozero; + if (execute(0, a1, 1)==0) + continue; + inglob |= dosub(gsubf < 2); + if (gsubf) { + int i = 1; + + while (*loc2) { + if (execute(1, NULL, 1)==0) + break; + inglob |= dosub(gsubf == -1 || ++i == gsubf); + } + } + subnewa = putline(); + *a1 &= ~01; + if (anymarks) { + for (markp = names; markp < &names[26]; markp++) + if (*markp == *a1) + *markp = subnewa; + } + *a1 = subnewa; + ozero = zero; + nl = append(getsub, a1); + nl += zero-ozero; + a1 += nl; + addr2 += nl; + } + insub = 0; + if (inglob==0) + error("no match"); +} + +static int +compsub(void) +{ + register int seof, c, i; + static char *oldrhs; + static int orhssz; + char mb[MB_LEN_MAX+1]; + + if ((seof = GETWC(mb)) == '\n' || seof == ' ') + error("illegal or missing delimiter"); + nodelim = 0; + compile(NULL, expbuf, &expbuf[ESIZE], seof); + i = 0; + for (;;) { + c = GETWC(mb); + if (c=='\\') { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = c; + c = GETWC(mb); + } else if (c=='\n') { + if (globp && *globp) { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = '\\'; + } + else if (nodelim) + error("illegal or missing delimiter"); + else { + peekc = c; + pflag++; + break; + } + } else if (c==seof) + break; + for (c = 0; c==0 || mb[c]; c++) { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = mb[c]; + } + } + rhsbuf[i++] = 0; + if (rhsbuf[0] == '%' && rhsbuf[1] == 0) { + if (orhssz == 0) + error("no remembered replacement string"); + strcpy(rhsbuf, oldrhs); + } else { + if (orhssz < RHSIZE && + (oldrhs = realloc(oldrhs, orhssz=RHSIZE)) == 0) + error("replacement string too long"); + strcpy(oldrhs, rhsbuf); + } + if ((peekc = getchr()) == 'g') { + peekc = 0; + newline(); + return(-1); + } else if (peekc >= '0' && peekc <= '9') { + c = getnum(); + if (c < 1 || c > LBSIZE) + error("invalid count"); + newline(); + return c; + } + newline(); + return(0); +} + +static int +getsub(void) +{ + register char *p1, *p2; + + p1 = linebuf; + if ((p2 = linebp) == 0) + return(EOF); + while (*p1++ = *p2++) + ; + linebp = 0; + return(0); +} + +static int +dosub(int really) +{ + register char *lp, *sp; + register int i, j, k; + int c; + + if (!really) + goto copy; + i = 0; + j = 0; + k = 0; + while (&linebuf[i] < loc1) + genbuf[j++] = linebuf[i++]; + while (c = rhsbuf[k++]&0377) { + if (c=='&') { + j = place(j, loc1, loc2); + continue; + } else if (c == '\\') { + c = rhsbuf[k++]&0377; + if (c >='1' && c < nbra+'1') { + j = place(j, braslist[c-'1'], braelist[c-'1']); + continue; + } + } + if (j >= LBSIZE) + growlb("line too long"); + genbuf[j++] = c; + } + i = loc2 - linebuf; + loc2 = j + linebuf; +#if defined (SUS) || defined (SU3) || defined (S42) + if (loc1 == &linebuf[i]) { + int n; + wchar_t wc; + if (mb_cur_max > 1 && (n = mbtowc(&wc, loc2, mb_cur_max)) > 0) + loc2 += n; + else + loc2++; + } +#endif /* SUS || SU3 || S42 */ + while (genbuf[j++] = linebuf[i++]) + if (j >= LBSIZE) + growlb("line too long"); + if (really) { + lp = linebuf; + sp = genbuf; + } else { + copy: sp = linebuf; + lp = genbuf; + } + while (*lp++ = *sp++) + ; + return really; +} + +static int +place(register int j, register const char *l1, register const char *l2) +{ + + while (l1 < l2) { + genbuf[j++] = *l1++; + if (j >= LBSIZE) + growlb("line too long"); + } + return(j); +} + +static void +move(int cflag) +{ + register long *adt, *ad1, *ad2; + + setdot(); + nonzero(); + if ((adt = address())==0) + error("illegal move destination"); + newline(); + checkpoint(); + if (cflag) { + long *ozero; + intptr_t delta; + ad1 = dol; + ozero = zero; + append(getcopy, ad1++); + ad2 = dol; + delta = zero - ozero; + ad1 += delta; + adt += delta; + } else { + ad2 = addr2; + for (ad1 = addr1; ad1 <= ad2;) + *ad1++ &= ~01; + ad1 = addr1; + } + ad2++; + if (adt<ad1) { + dot = adt + (ad2-ad1); + if ((++adt)==ad1) + return; + reverse(adt, ad1); + reverse(ad1, ad2); + reverse(adt, ad2); + } else if (adt >= ad2) { + dot = adt++; + reverse(ad1, ad2); + reverse(ad2, adt); + reverse(ad1, adt); + } else + error("illegal move destination"); + fchange = 1; +} + +static void +reverse(register long *a1, register long *a2) +{ + register int t; + + for (;;) { + t = *--a2; + if (a2 <= a1) + return; + *a2 = *a1; + *a1++ = t; + } +} + +static int +getcopy(void) +{ + if (addr1 > addr2) + return(EOF); + getline(*addr1++, 0); + return(0); +} + +static int +execute(int gf, long *addr, int subst) +{ + register char *p1, *p2, c; + + for (c=0; c<NBRA; c++) { + braslist[c&0377] = 0; + braelist[c&0377] = 0; + } + if (gf) { + if (circf) + return(0); + p1 = linebuf; + p2 = genbuf; + while (*p1++ = *p2++) + ; + locs = p1 = loc2; + } else { + if (addr==zero) + return(0); + p1 = getline(*addr, 1); + locs = 0; + } + needsub = subst; + return step(p1, expbuf); +} + +static void +cmplerr(int c) +{ + const char *msg; + +#if !defined (SUS) && !defined (S42) && !defined (SU3) + expbuf[0] = 0; +#endif + switch (c) { + case 11: + msg = "Range endpoint too large"; + break; + case 16: + msg = "bad number"; + break; + case 25: + msg = "`\\digit' out of range"; + break; + case 36: + msg = "illegal or missing delimiter"; + break; + case 41: + msg = "no remembered search string"; + break; + case 42: + msg = "'\\( \\)' imbalance"; + break; + case 43: + msg = "Too many `\\(' s"; + break; + case 44: + msg = "more than 2 numbers given"; + break; + case 45: + msg = "'\\}' expected"; + break; + case 46: + msg = "first number exceeds second"; + break; + case 49: + msg = "'[ ]' imbalance"; + break; + case 50: + msg = "regular expression overflow"; + break; + case 67: + msg = "illegal byte sequence"; + break; + default: + msg = "regular expression error"; + break; + } + error(msg); +} + +static void +doprnt(long *bot, long *top) +{ + long *a1; + + a1 = bot; + do { + if (numbf ^ Nflag) { + putd(a1-zero); + putchr('\t'); + } + nlputs(getline(*a1++, 0)); + } while (a1 <= top); + pflag = 0; + listf = 0; + numbf = 0; +} + +static void +putd(long c) +{ + register int r; + + r = c%10; + c /= 10; + if (c) + putd(c); + putchr(r + '0'); +} + +static void +nlputs(register const char *sp) +{ + if (listf) + list(sp); + else if (tabstops) + expand(sp); + else + puts(sp); +} + +static void +puts(register const char *sp) +{ + while (*sp) { + if (*sp != '\n') + putchr(*sp++ & 0377); + else + sp++, putchr('\0'); + } + putchr('\n'); +} + +static void +list(const char *lp) +{ + int col, n; + wchar_t c; + + col = numbf ^ Nflag ? 8 : 0; + while (*lp) { + if (mb_cur_max > 1 && *lp&0200) + n = mbtowc(&c, lp, mb_cur_max); + else { + n = 1; + c = *lp&0377; + } + if (col+1 >= 72) { + col = 0; + putchr('\\'); + putchr('\n'); + } + if (n<0 || +#if defined (SUS) || defined (S42) || defined (SU3) + c == '\\' || +#endif /* SUS || S42 || SU3 */ + !(mb_cur_max>1 ? iswprint(c) : isprint(c))) { + if (n<0) + n = 1; + while (n--) + col += lstchr(*lp++&0377); + } else if (mb_cur_max>1) { + col += wcwidth(c); + while (n--) + putchr(*lp++&0377); + } else { + putchr(*lp++&0377); + col++; + } + } +#if defined (SUS) || defined (S42) || defined (SU3) + putchr('$'); +#endif + putchr('\n'); +} + +static int +lstchr(int c) +{ + int cad = 1, d; + +#if !defined (SUS) && !defined (S42) && !defined (SU3) + if (c=='\t') { + c = '>'; + goto esc; + } + if (c=='\b') { + c = '<'; + esc: + putchr('-'); + putchr('\b'); + putchr(c); + } else if (c == '\n') { + putchr('\\'); + putchr('0'); + putchr('0'); + putchr('0'); + cad = 4; +#else /* !SUS, !S42, !SU3 */ + if (c == '\n') + c = '\0'; + if (c == '\\') { + putchr('\\'); + putchr('\\'); + cad = 2; + } else if (c == '\a') { + putchr('\\'); + putchr('a'); + cad = 2; + } else if (c == '\b') { + putchr('\\'); + putchr('b'); + cad = 2; + } else if (c == '\f') { + putchr('\\'); + putchr('f'); + cad = 2; + } else if (c == '\r') { + putchr('\\'); + putchr('r'); + cad = 2; + } else if (c == '\t') { + putchr('\\'); + putchr('t'); + cad = 2; + } else if (c == '\v') { + putchr('\\'); + putchr('v'); + cad = 2; +#endif /* !SUS, !S42, !SU3 */ + } else { + putchr('\\'); + putchr(((c&~077)>>6)+'0'); + c &= 077; + d = c & 07; + putchr(c > d ? ((c-d)>>3)+'0' : '0'); + putchr(d+'0'); + cad = 4; + } + return cad; +} + +static void +putstr(const char *s) +{ + while (*s) + putchr(*s++); +} + +static char line[70]; +static char *linp = line; + +static void +putchr(int ac) +{ + register char *lp; + register int c; + + lp = linp; + c = ac; + *lp++ = c; + if(c == '\n' || lp >= &line[64]) { + linp = line; + write(1, line, lp-line); + return; + } + linp = lp; +} + +static void +checkpoint(void) +{ + long *a1, *a2; + + if (undzero && globp == NULL) { + for (a1 = zero+1, a2 = undzero+1; a1 <= dol; a1++, a2++) + *a2 = *a1; + unddot = &undzero[dot-zero]; + unddol = &undzero[dol-zero]; + for (a1 = names, a2 = undnames; a1 < &names[26]; a1++, a2++) + *a2 = *a1; + } +} + +#define swap(a, b) (t = a, a = b, b = t) + +static void +undo(void) +{ + long *t; + + if (undzero == NULL) + error("no undo information saved"); + swap(zero, undzero); + swap(dot, unddot); + swap(dol, unddol); + swap(names, undnames); +} + +static int +maketf(int fd) +{ + char *tmpdir; + + if (fd == -1) { + if ((tmpdir = getenv("TMPDIR")) == NULL || + (fd = creatf(tmpdir)) < 0) + if ((fd = creatf("/var/tmp")) < 0 && + (fd = creatf("/tmp")) < 0) + error("cannot create temporary file"); + } else + ftruncate(fd, 0); /* blkio() will seek to 0 anyway */ + return fd; +} + +static int +creatf(const char *tmpdir) +{ + if (strlen(tmpdir) >= sizeof tfname - 9) + return -1; + strcpy(tfname, tmpdir); + strcat(tfname, "/eXXXXXX"); + return mkstemp(tfname); +} + +static int +sopen(const char *fn, int rdwr) +{ + int pf[2], fd = -1; + + if (fn[0] == '!') { + fn++; + if (pipe(pf) < 0) + error("write or open on pipe failed"); + switch (pipid = fork()) { + case 0: + if (rdwr == READ) + dup2(pf[1], 1); + else + dup2(pf[0], 0); + close(pf[0]); + close(pf[1]); + sigset(SIGHUP, oldhup); + sigset(SIGQUIT, oldquit); + sigset(SIGPIPE, oldpipe); + execl(SHELL, "sh", "-c", fn, NULL); + _exit(0100); + default: + close(pf[rdwr == READ ? 1 : 0]); + fd = pf[rdwr == READ ? 0 : 1]; + break; + case -1: + error("fork failed - try again"); + } + } else if (rdwr == READ) + fd = open(fn, O_RDONLY); + else if (rdwr == EXIST) + fd = open(fn, O_WRONLY); + else /*if (rdwr == WRITE)*/ + fd = creat(fn, 0666); + if (fd >= 0 && rdwr == READ) + readop = 1; + if (fd >= 0) + fstat(fd, &fstbuf); + return fd; +} + +static void +sclose(int fd) +{ + int status; + + close(fd); + if (pipid >= 0) { + while (wait(&status) != pipid); + pipid = -1; + } + readop = 0; +} + +static void +fspec(const char *lp) +{ + struct termios ts; + const char *cp; + + freetabs(); + maxlength = 0; + if (tcgetattr(1, &ts) < 0 +#ifdef TAB3 + || (ts.c_oflag&TAB3) == 0 +#endif + ) + return; + while (lp[0]) { + if (lp[0] == '<' && lp[1] == ':') + break; + lp++; + } + if (lp[0]) { + lp += 2; + while ((cp = ftok(&lp)) != NULL) { + switch (*cp) { + case 't': + freetabs(); + if ((tabstops = tabstring(&cp[1])) == NULL) + goto err; + break; + case 's': + maxlength = atoi(&cp[1]); + break; + case 'm': + case 'd': + case 'e': + break; + case ':': + if (cp[1] == '>') { + if (tabstops == NULL) + if ((tabstops = tabstring("0")) + == NULL) + goto err; + return; + } + /*FALLTHRU*/ + default: + err: freetabs(); + maxlength = 0; + errput("PWB spec problem", NULL); + return; + } + } + } +} + +static const char * +ftok(const char **lp) +{ + const char *cp; + + while (**lp && **lp != ':' && (**lp == ' ' || **lp == '\t')) + (*lp)++; + cp = *lp; + while (**lp && **lp != ':' && **lp != ' ' && **lp != '\t') + (*lp)++; + return cp; +} + +static struct tabulator * +repetitive(int repetition) +{ + struct tabulator *tp, *tabspec; + int col, i; + + if ((tp = tabspec = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp->t_rep = repetition; + if (repetition > 0) { + for (col = 1+repetition, i = 0; i < 22; col += repetition) { + if ((tp->t_nxt = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp = tp->t_nxt; + tp->t_tab = col; + } + } + return tabspec; +} + +#define blank(c) ((c) == ' ' || (c) == '\t') + +static struct tabulator * +tablist(const char *s) +{ + struct tabulator *tp, *tabspec; + char *x; + int prev = 0, val; + + if ((tp = tabspec = calloc(1, sizeof *tp)) == NULL) + return NULL; + for (;;) { + while (*s == ',') + s++; + if (*s == '\0' || blank(*s) || *s == ':') + break; + val = strtol(s, &x, 10); + if (*s == '+') + val += prev; + prev = val; + if (*s == '-' || (*x != ',' && !blank(*x) && *x != ':' && + *x != '\0')) + return NULL; + s = x; + if ((tp->t_nxt = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp = tp->t_nxt; + tp->t_tab = val; + } + return tabspec; +} + +static struct tabulator * +tabstring(const char *s) +{ + const struct { + const char *c_nam; + const char *c_str; + } canned[] = { + { "a", "1,10,16,36,72" }, + { "a2", "1,10,16,40,72" }, + { "c", "1,8,12,16,20,55" }, + { "c2", "1,6,10,14,49" }, + { "c3", "1,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62,67" }, + { "f", "1,7,11,15,19,23" }, + { "p", "1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61" }, + { "s", "1,10,55" }, + { "u", "1,12,20,44" }, + { 0, 0 } + }; + + int i, j; + + if (s[0] == '-') { + if (s[1] >= '0' && s[1] <= '9' && ((i = atoi(&s[1])) != 0)) + return repetitive(i); + for (i = 0; canned[i].c_nam; i++) { + for (j = 0; canned[i].c_nam[j]; j++) + if (s[j+1] != canned[i].c_nam[j]) + break; + if ((s[j+1]=='\0' || s[j+1]==':' || blank(s[j+1])) && + canned[i].c_nam[j] == '\0') + return tablist(canned[i].c_str); + } + return NULL; + } else + return tablist(s); +} + +static void +freetabs(void) +{ + struct tabulator *tp; + + tp = tabstops; + while (tp) { + tabstops = tp->t_nxt; + free(tp); + tp = tabstops; + } +} + +static void +expand(const char *s) +{ + struct tabulator *tp = tabstops; + int col = 0, n = 1, m, tabcnt = 0, nspc; + wchar_t wc; + + while (*s) { + nspc = 0; + switch (*s) { + case '\n': + putchr('\0'); + s++; + continue; + case '\t': + if (tp) { + if (tp->t_rep) { + if (col % tp->t_rep == 0) { + nspc++; + col++; + } + while (col % tp->t_rep) { + nspc++; + col++; + } + break; + } + while (tp && (col>tp->t_tab || tp->t_tab == 0)) + tp = tp->t_nxt; + if (tp && col == tp->t_tab) { + nspc++; + col++; + tp = tp->t_nxt; + } + if (tp) { + while (col < tp->t_tab) { + nspc++; + col++; + } + tp = tp->t_nxt; + break; + } + } + tabcnt = 1; + nspc++; + break; + default: + if (mb_cur_max>1 && (n=mbtowc(&wc, s, mb_cur_max))>0) { + if ((m = wcwidth(wc)) > 0) + col += m; + } else { + col++; + n = 1; + } + } + if (maxlength && col > maxlength) { + putstr("\ntoo long"); + break; + } + if (nspc) { + while (nspc--) + putchr(' '); + s++; + } else + while (n--) + putchr(*s++); + } + if (tabcnt) + putstr("\ntab count"); + putchr('\n'); +} + +static wint_t +GETWC(char *mb) +{ + int c, n; + + n = 1; + mb[0] = c = GETC(); + mb[1] = '\0'; + if (mb_cur_max > 1 && c&0200 && c != EOF) { + int m; + wchar_t wc; + + while ((m = mbtowc(&wc, mb, mb_cur_max)) < 0 && n<mb_cur_max) { + mb[n++] = c = GETC(); + mb[n] = '\0'; + if (c == '\n' || c == EOF) + break; + } + if (m != n) + ERROR(67); + return wc; + } else + return c; +} + +static void +growlb(const char *msg) +{ + char *olb = linebuf; + int i; + + LBSIZE += 512; + if ((linebuf = realloc(linebuf, LBSIZE)) == NULL || + (genbuf = realloc(genbuf, LBSIZE)) == NULL) + error(msg); + if (linebuf != olb) { + loc1 += linebuf - olb; + loc2 += linebuf - olb; + for (i = 0; i < NBRA; i++) { + if (braslist[i]) + braslist[i] += linebuf - olb; + if (braelist[i]) + braelist[i] += linebuf - olb; + } + } +} + +static void +growrhs(const char *msg) +{ + RHSIZE += 256; + if ((rhsbuf = realloc(rhsbuf, RHSIZE)) == NULL) + error(msg); +} + +static void +growfn(const char *msg) +{ + FNSIZE += 64; + if ((savedfile = realloc(savedfile, FNSIZE)) == NULL || + (file = realloc(file, FNSIZE)) == NULL) + error(msg); + if (FNSIZE == 64) + file[0] = savedfile[0] = 0; +} + +#if defined (SUS) || defined (S42) || defined (SU3) +union ptrstore { + void *vp; + char bp[sizeof (void *)]; +}; + +static void * +fetchptr(const char *bp) +{ + union ptrstore u; + int i; + + for (i = 0; i < sizeof (void *); i++) + u.bp[i] = bp[i]; + return u.vp; +} + +static void +storeptr(void *vp, char *bp) +{ + union ptrstore u; + int i; + + u.vp = vp; + for (i = 0; i < sizeof (void *); i++) + bp[i] = u.bp[i]; +} + +#define add(c) ((i>=LBSIZE ? (growlb("regular expression overflow"),0) : 0), \ + genbuf[i++] = (c)) + +#define copy(s) { \ + int m; \ + for (m = 0; m==0 || s[m]; m++) \ + add(s[m]); \ +} + +static char * +compile(char *unused, char *ep, const char *endbuf, int seof) +{ + INIT + int c, d, i; + regex_t *rp; + char *op; + char mb[MB_LEN_MAX+1]; + + op = ep; + ep += 2; + if ((rp = fetchptr(ep)) == NULL) { + if ((rp = calloc(1, sizeof *rp)) == NULL) + ERROR(50); + storeptr(rp, ep); + } + ep += sizeof (void *); + i = 0; + nbra = 0; + do { + if ((c = GETWC(mb)) == seof) + add('\0'); + else if (c == '\\') { + copy(mb); + c = GETWC(mb); + if (c == '(') + nbra++; + goto normchar; + } else if (c == '[') { + add(c); + d = EOF; + do { + c = GETWC(mb); + if (c == EOF || c == '\n') + ERROR(49); + copy(mb); + if (d=='[' && (c==':' || c=='.' || c=='=')) { + d = c; + do { + c = GETWC(mb); + if (c == EOF || c == '\n') + ERROR(49); + copy(mb); + } while (c != d || PEEKC() != ']'); + c = GETWC(mb); + copy(mb); + c = EOF; + } + d = c; + } while (c != ']'); + } else { + if (c == EOF || c == '\n') { + if (c == '\n') + UNGETC(c); + mb[0] = c = '\0'; + } + if (c == '\0') + nodelim = 1; + normchar: copy(mb); + } + } while (genbuf[i-1] != '\0'); + if (genbuf[0]) { + int reflags = 0; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif + if (op[0]) + regfree(rp); + op[0] = 0; + switch (regcomp(rp, genbuf, reflags)) { + case 0: + break; + case REG_ESUBREG: + ERROR(25); + /*NOTREACHED*/ + case REG_EBRACK: + ERROR(49); + /*NOTREACHED*/ + case REG_EPAREN: + ERROR(42); + /*NOTREACHED*/ + case REG_BADBR: + case REG_EBRACE: + ERROR(45); + /*NOTREACHED*/ + case REG_ERANGE: + ERROR(11); + /*NOTREACHED*/ + case REG_ESPACE: + ERROR(50); + /*NOTREACHED*/ + default: + ERROR(-1); + } + op[0] = 1; + circf = op[1] = genbuf[0] == '^'; + } else if (op[0]) { + circf = op[1]; + } else + ERROR(41); + return ep + sizeof (void *); +} + +static int +step(const char *lp, const char *ep) +{ + regex_t *rp; + regmatch_t bralist[NBRA+1]; + int eflag = 0; + int res; + int i; + + rp = fetchptr(&ep[2]); + if (ep[0] == 0) + return 0; + if (locs) + eflag |= REG_NOTBOL; + if ((res = regexec(rp, lp, needsub? NBRA+1 : 0, bralist, eflag)) == 0 && + needsub) { + loc1 = (char *)lp + bralist[0].rm_so; + loc2 = (char *)lp + bralist[0].rm_eo; + for (i = 1; i <= NBRA; i++) { + if (bralist[i].rm_so != -1) { + braslist[i-1] = (char *)lp + bralist[i].rm_so; + braelist[i-1] = (char *)lp + bralist[i].rm_eo; + } else + braslist[i-1] = braelist[i-1] = NULL; + } + } + return res == 0; +} +#endif /* SUS || S42 || SU3 */ + +static void +help(void) +{ + const char *desc[] = { + "(.)a append up to .", + "(.)b[n] browse n lines", + "(.,.)c change up to .", + "(.,.)d delete lines", + "e [file] edit file", + "E [file] force edit", + "f [file] print or set file", + "(1,$)g/RE/cmd global cmd", + "(1,$)G/RE/ interactive global", + "h print last error", + "H toggle error messages", + "help print this screen", + "(.)i insert up to .", + "(.,.+1)j join lines", + "(.)kx mark line with x", + "(.,.)l list lines", + "(.,.)ma move lines to a", + "(.,.)n number lines", + "N revert n and p", + "(.)o[n] show n lines of context", + "(.,.)p print lines", + "P toggle prompt", + "q quit", + "Q force quit", + "($)r read file", + "(.,.)s/RE/repl/ search and replace", + "(.,.)s/RE/rp/g replace all occurrences", + "(.,.)s/RE/rp/n replace n-th occurrence", + "(.,.)ta transfer lines to a", + "u undo last change", + "(1,$)v/RE/cmd reverse global", + "(1,$)V/RE/ reverse i/a global", + "(1,$)w [file] write file", + "(1,$)W [file] append to file", + "z write buffer and quit", + "($)= print line number", + "!command execute shell command", + "(.+1)<newline> print one line", + "/RE find RE forwards", + "?RE find RE backwards", + "1 first line", + ". current line", + "$ last line", + ", 1,$", + "; .,$", + NULL + }; + char line[100]; + int c, half, i, k; + + half = (sizeof desc / sizeof *desc) / 2; + for (i = 0; i < half && desc[i]; i++) { + c = 0; + for (k = 0; desc[i][k]; k++) + line[c++] = desc[i][k]; + if (desc[i+half]) { + while (c < 40) + line[c++] = ' '; + for (k = 0; desc[i+half][k]; k++) + line[c++] = desc[i+half][k]; + } + line[c] = 0; + puts(line); + } +} diff --git a/ed/mkfile b/ed/mkfile @@ -0,0 +1,8 @@ +BIN = ed +OBJ = ed.o +LOCAL_CFLAGS = -DSU3 -DSHELL=\"$SHELL\" +INSTALL_BIN = ed +INSTALL_MAN1 = ed.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/expr/expr.1 b/expr/expr.1 @@ -0,0 +1,211 @@ +.\" +.\" Sccsid @(#)expr.1 1.16 (gritter) 2/3/05 +.\" Parts taken from expr(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH EXPR 1 "2/3/05" "Heirloom Toolchest" "User Commands" +.SH NAME +expr \- evaluate arguments as an expression +.SH SYNOPSIS +.B expr +.I arguments +\&.\|.\|. +.SH DESCRIPTION +The arguments are taken as an expression. +After evaluation, the result is written on the standard output. +Each token of the expression is a separate argument. +.PP +The operators and keywords are listed below. +The list is in order of increasing precedence, +with equal precedence operators grouped. +.TP +.I expr | expr +yields the first +.I expr +if it is neither null nor `0', otherwise +yields the second +.I expr. +.TP +.I expr & expr +yields the first +.I expr +if neither +.I expr +is null or `0', otherwise yields `0'. +.TP +.I expr relop expr +where +.I relop is one of +< <= = != >= >, +yields `1' if the indicated comparison is true, `0' if false. +The comparison is numeric if both +.I expr +are integers, otherwise lexicographic. +.TP +.IR expr " + " expr +.br +.IR expr " - " expr +.br +addition or subtraction of the arguments. +.TP +.IR expr " * " expr +.br +.IR expr " / " expr +.br +.IR expr " % " expr +.br +multiplication, division, or remainder of the arguments. +.TP +.IR expr " : " expr +The matching operator compares the string first argument +with the regular expression second argument. +Regular expression syntax is the same as that of +.IR ed (1); +.B /usr/5bin/expr +uses simple regular expressions, +.BR /usr/5bin/posix/expr , +.BR /usr/5bin/posix2001/expr , +and +.B /usr/5bin/s42/expr +use basic regular expressions. +The +\fB\\(\|.\|.\|.\|\\)\fP +pattern symbols can be used to select a portion of the +first argument. +Otherwise, +the matching operator yields the number of characters matched +(`0' on failure). +.TP +.RI match " expr expr" +Same as +.IR expr " : " expr . +.TP +.RI ( " expr " ) +parentheses for grouping. +.TP +.I string +Yields itself +unless it is part of a larger expression. +With +.BR /usr/5bin/posix/expr +and +.BR /usr/5bin/posix2001/expr , +all +.I strings +that form valid decimal numbers +are converted to the canonical form. +.PP +The following operators are supported only by +.B /usr/5bin/s42/expr +or if the +.B SYSV3 +environment variable is set: +.TP +.RI length " string" +Returns the number of characters in +.IR string . +.TP +.RI substr " string index count" +Returns a string that consists of +.I count +characters +beginning at position +.I index +of +.I string +(starting at 1). +.TP +.RI index " string set" +Returns the index in +.I string +(starting at 1) of the first occurrence +of one of the characters in +.IR set , +or 0 if no character is found. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See locale(7). +.TP +.B LC_COLLATE +Sets the collation sequence for string comparison, +and for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters in regular expressions, +for the match, length, substr, and index operators, +and the availability and composition of character classes +in basic regular expressions. +.TP +.B SYSV3 +Enables some additional operators as described above. +.SH EXAMPLES +.PP +To add 1 to the Shell variable +.IR a : +.IP +a=\`expr $a + 1\` +.PP +To find the filename part (least significant part) +of the pathname stored in variable +.I a, +which may or may not contain `/': +.IP +expr "$a" : \'.*/\e(\^.*\e)\' \'\^|\' "$a" +.LP +Note the quoted Shell metacharacters. +.\" Historic example, not to be deleted and useful just because of its errors +Also note that this example generates wrong results +if the result of the substitution is `0' +or if `$a' equals one of the +.I expr +operators. +Be sure that your code avoids such problems +and use +.IR basename (1) +if to actually cut out filename parts. +.SH "SEE ALSO" +ed(1), sh(1), test(1) +.SH DIAGNOSTICS +.I Expr +returns the following exit codes: +.PP + 0 if the expression is neither null nor `0', +.br + 1 if the expression +is null or `0', +.br + 2 for invalid expressions. +.SH NOTES +Integers are treated as 64-bit, 2's complement numbers. diff --git a/expr/expr.y b/expr/expr.y @@ -0,0 +1,546 @@ +/* from Unix 7th Edition /usr/src/cmd/expr.y */ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +%{ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (S42) +static const char sccsid[] USED = "@(#)expr_s42.sl 1.28 (gritter) 5/29/05"; +static int sus = 0; +#elif defined (SU3) +static const char sccsid[] USED = "@(#)expr_su3.sl 1.28 (gritter) 5/29/05"; +static int sus = 3; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)expr_sus.sl 1.28 (gritter) 5/29/05"; +static int sus = 1; +#else +static const char sccsid[] USED = "@(#)expr.sl 1.28 (gritter) 5/29/05"; +static int sus = 0; +#endif + +/* expression command */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> +#include <wchar.h> +#include <unistd.h> +#include <limits.h> +#include <ctype.h> +#include <inttypes.h> + +#include "atoll.h" + +#define EQL(x,y) !strcmp(x,y) + +#define NUMSZ 25 + +static char **Av; +static int Ac; +static int Argi; +static int mb_cur_max; +static char *progname; +extern int sysv3; + +static char *Mstring[1]; + +int yylex(void); +static char *_rel(int op, register char *r1, register char *r2); +static char *_arith(int op, char *r1, char *r2); +static char *_conj(int op, char *r1, char *r2); +static char *match(char *s, char *p); +static int ematch(char *s, register char *p); +static void errxx(int c); +static int yyerror(const char *s); +static int numeric(const char *s); +static int chars(const char *s, const char *end); +static void *srealloc(void *, size_t); +static void *smalloc(size_t); +static char *numpr(int64_t val); + +static char *substr(char *, const char *, const char *); +static char *length(const char *); +static char *eindex(const char *, const char *); + +#if defined (SUS) || defined (SU3) || defined (S42) +#include <regex.h> +static int nbra; +#else /* !SUS, !SU3, !S42 */ +#include <regexpr.h> +#endif /* !SUS, !SU3, !S42 */ +%} + +/* Yacc productions for "expr" command: */ + +%union { + char *val; +} + +%token <val> OR AND ADD SUBT MULT DIV REM EQ GT GEQ LT LEQ NEQ +%token <val> A_STRING SUBSTR LENGTH INDEX NOARG MATCH + +%type <val> expr + +/* operators listed below in increasing precedence: */ +%left OR +%left AND +%left EQ LT GT GEQ LEQ NEQ +%left ADD SUBT +%left MULT DIV REM +%left MCH +%left MATCH +%left SUBSTR +%left LENGTH INDEX +%% + +/* a single `expression' is evaluated and printed: */ + +expression: expr NOARG { + if (sus && numeric($1)) { + int64_t n; + n = atoll($1); + printf("%lld\n", n); + exit(n == 0); + } else + puts($1); + exit((!strcmp($1,"0")||!strcmp($1,"\0"))? 1: 0); + } + ; + + +expr: '(' expr ')' { $$ = $2; } + | expr OR expr { $$ = _conj(OR, $1, $3); } + | expr AND expr { $$ = _conj(AND, $1, $3); } + | expr EQ expr { $$ = _rel(EQ, $1, $3); } + | expr GT expr { $$ = _rel(GT, $1, $3); } + | expr GEQ expr { $$ = _rel(GEQ, $1, $3); } + | expr LT expr { $$ = _rel(LT, $1, $3); } + | expr LEQ expr { $$ = _rel(LEQ, $1, $3); } + | expr NEQ expr { $$ = _rel(NEQ, $1, $3); } + | expr ADD expr { $$ = _arith(ADD, $1, $3); } + | expr SUBT expr { $$ = _arith(SUBT, $1, $3); } + | expr MULT expr { $$ = _arith(MULT, $1, $3); } + | expr DIV expr { $$ = _arith(DIV, $1, $3); } + | expr REM expr { $$ = _arith(REM, $1, $3); } + | expr MCH expr { $$ = match($1, $3); } + | MATCH expr expr { $$ = match($2, $3); } + | SUBSTR expr expr expr { $$ = substr($2, $3, $4); } + | LENGTH expr { $$ = length($2); } + | INDEX expr expr { $$ = eindex($2, $3); } + | A_STRING + ; +%% + +int +main(int argc, char **argv) +{ + extern int yyparse(void); + + Ac = argc; + Argi = 1; + Av = argv; + progname = basename(argv[0]); + if (getenv("SYSV3") != NULL) + sysv3 = 1; + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (Av[1] && Av[1][0] == '-' && Av[1][1] == '-' && Av[1][2] == '\0') + Argi++; + yyparse(); + /*NOTREACHED*/ + return 0; +} + +static const char *operators[] = { + "|", "&", "+", "-", "*", "/", "%", ":", + "=", "==", "<", "<=", ">", ">=", "!=", + "match", "substr", "length", "index", + "\0" +}; + +static int op[] = { + OR, AND, ADD, SUBT, MULT, DIV, REM, MCH, + EQ, EQ, LT, LEQ, GT, GEQ, NEQ, + MATCH, SUBSTR, LENGTH, INDEX +}; + +int +yylex(void) +{ + register char *p; + register int i; + + if(Argi >= Ac) return NOARG; + + p = Av[Argi++]; + + if((*p == '(' || *p == ')') && p[1] == '\0') + return (int)*p; + for(i = 0; *operators[i]; ++i) + if(EQL(operators[i], p)) + return op[i]; + + yylval.val = p; + return A_STRING; +} + +static char * +_rel(int op, register char *r1, register char *r2) +{ + register int64_t i; + + if (numeric(r1) && numeric(r2)) + i = atoll(r1) - atoll(r2); + else + i = strcoll(r1, r2); + switch(op) { + case EQ: i = i==0; break; + case GT: i = i>0; break; + case GEQ: i = i>=0; break; + case LT: i = i<0; break; + case LEQ: i = i<=0; break; + case NEQ: i = i!=0; break; + } + return i? "1": "0"; +} + +static char * +_arith(int op, char *r1, char *r2) +{ + int64_t i1, i2; + register char *rv; + + if (!numeric(r1) || !numeric(r2)) + yyerror("non-numeric argument"); + i1 = atoll(r1); + i2 = atoll(r2); + + switch(op) { + case ADD: i1 = i1 + i2; break; + case SUBT: i1 = i1 - i2; break; + case MULT: i1 = i1 * i2; break; + case DIV: + if (i2 == 0) yyerror("division by zero"); + i1 = i1 / i2; break; + case REM: i1 = i1 % i2; break; + } + rv = numpr(i1); + return rv; +} + +static char * +_conj(int op, char *r1, char *r2) +{ + register char *rv = NULL; + + switch(op) { + + case OR: + if(EQL(r1, "0") + || EQL(r1, "")) + if(EQL(r2, "0") + || EQL(r2, "")) + rv = "0"; + else + rv = r2; + else + rv = r1; + break; + case AND: + if(EQL(r1, "0") + || EQL(r1, "")) + rv = "0"; + else if(EQL(r2, "0") + || EQL(r2, "")) + rv = "0"; + else + rv = r1; + break; + } + return rv; +} + +static char * +match(char *s, char *p) +{ + register char *rv; + int gotcha; + + gotcha = ematch(s, p); + if(nbra) { + if (gotcha) { + rv = smalloc(strlen(Mstring[0])+1); + strcpy(rv, Mstring[0]); + } else + rv = ""; + } else + rv = numpr(gotcha); + return rv; +} + +#if defined (SUS) || defined (SU3) || defined (S42) +static int +ematch(char *s, register char *p) +{ + regex_t re; + register int num; + regmatch_t bralist[2]; + int reflags = 0, val; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif + if ((num = regcomp(&re, p, reflags)) != 0) + errxx(0); + nbra = re.re_nsub; + if (regexec(&re, s, 2, bralist, 0) == 0 && bralist[0].rm_so == 0) { + if (re.re_nsub >= 1) { + num = bralist[1].rm_eo - bralist[1].rm_so; + Mstring[0] = srealloc(Mstring[0], num + 1); + strncpy(Mstring[0], s + bralist[1].rm_so, num); + Mstring[0][num] = '\0'; + } + val = chars(s, &s[bralist[0].rm_eo]); + } else + val = 0; + regfree(&re); + return val; +} +#else /* !SUS, !SU3, !S42 */ +static int +ematch(char *s, register char *p) +{ + char *expbuf; + register int num, val; + + if ((expbuf = compile(p, NULL, NULL)) == NULL) + errxx(regerrno); + if(nbra > 1) + yyerror("Too many '\\('s"); + if(advance(s, expbuf)) { + if(nbra == 1) { + p = braslist[0]; + num = braelist[0] ? braelist[0] - p : 0; + Mstring[0] = srealloc(Mstring[0], num + 1); + strncpy(Mstring[0], p, num); + Mstring[0][num] = '\0'; + } + val = chars(s, loc2); + } else + val = 0; + free(expbuf); + return(val); +} +#endif /* !SUS, !SU3, !S42 */ + +/*ARGSUSED*/ +static void +errxx(int c) +{ + yyerror("RE error"); +} + +static int +yyerror(const char *s) +{ + fprintf(stderr, "%s: %s\n", progname, s); + exit(2); +} + +static int +numeric(const char *s) +{ + if (*s == '-') + s++; + if (!isdigit(*s & 0377)) + return 0; + do + s++; + while (isdigit(*s & 0377)); + return (*s == '\0'); +} + +static int +chars(const char *s, const char *end) +{ + int count = 0, n; + wchar_t wc; + + if (mb_cur_max > 1) { + while (s < end) { + if ((n = mbtowc(&wc, s, MB_LEN_MAX)) >= 0) + count++; + s += n > 0 ? n : 1; + } + } else + count = end - s; + return count; +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "no memory\n", 10); + exit(077); + } + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static char * +numpr(int64_t val) +{ + char *rv; + int ret; + + rv = smalloc(NUMSZ); + ret = snprintf(rv, NUMSZ, "%lld", (long long)val); + if (ret < 0 || ret >= NUMSZ) { + rv = srealloc(rv, ret + 1); + ret = snprintf(rv, ret, "%lld", (long long)val); + if (ret < 0) + yyerror("illegal number"); + } + return rv; +} + +#define next(wc, s, n) (mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowc(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? illseq() : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)) + +static int +illseq(void) +{ + yyerror("illegal byte sequence"); + /*NOTREACHED*/ + return 0; +} + +static char * +substr(char *v, const char *s, const char *w) +{ + long si, wi; + char *res; + wchar_t wc; + int n; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + si = atoll(s); + wi = atoll(w); + while (--si) + if (*v) { + next(wc, v, n); + v += n; + } + res = v; + while (wi--) + if (*v) { + next(wc, v, n); + v += n; + } + *v = '\0'; + return res; +} + +static char * +length(const char *s) +{ + long i = 0; + char *rv; + wchar_t wc; + int n; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + while (*s) { + next(wc, s, n); + s += n; + ++i; + } + rv = numpr(i); + return rv; +} + +static char * +eindex(const char *s, const char *t) +{ + long i, j, x; + char *rv; + wchar_t ws, wt; + int ns, nt; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + for (i = 0, x = 0; s[i]; x++, i += ns) { + next(ws, &s[i], ns); + for (j = 0; t[j]; j += nt) { + next(wt, &t[j], nt); + if (ws == wt) { + rv = numpr(++x); + return rv; + } + } + } + return "0"; +} diff --git a/expr/mkfile b/expr/mkfile @@ -0,0 +1,10 @@ +BIN = expr +OBJ = expr.o +LOCAL_CFLAGS = -DSU3 +INSTALL_BIN = expr +INSTALL_MAN1 = expr.1 +CLEAN_FILES = expr.c +DEPS = yacc libcommon + +<$mkbuild/mk.default + diff --git a/find/find.1 b/find/find.1 @@ -0,0 +1,558 @@ +'\" t +.\" Sccsid @(#)find.1 1.44 (gritter) 8/14/05 +.\" Parts taken from find(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH FIND 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +find \- find files +.SH SYNOPSIS +.B find +.I pathname-list expression +.SH DESCRIPTION +.I Find +recursively descends +the directory hierarchy for +each pathname in the +.I pathname-list +(i.\|e., one or more pathnames) +seeking files that match a boolean +.I expression +written in the primaries given below. +In the descriptions, the argument +.I n +is used as a decimal integer +where +.I +n +means more than +.I n, +.I \-n +means less than +.I n +and +.I n +means exactly +.IR n . +.TP 10n +.BR \-name " filename" +True if the +.I filename +argument matches the current file name. +Normal +Shell +argument syntax +as described in +.IR glob (7) +may be used if escaped (watch out for +`[', `?' and `*'). +The internationalization constructs +`[[:class:]]', `[[=e=]]', and `[[.cs.]]' +are understood with +.BR /usr/5bin/s42/find , +.BR /usr/5bin/posix/find , +and +.BR /usr/5bin/posix2001/find , +but not with +.BR /usr/5bin/find . +.TP +.BR \-perm " mode" +True if the file permission flags +exactly +match the +octal number +or symbolic +.I mode +(see +.IR chmod (1)). +If +.I mode +is prefixed by a minus sign, +the flags are compared: +.IR (flags&mode)==mode . +.TP +.BR \-type " c" +True if the type of the file +is +.I c, +where +.I c +is +.sp +.TS +lfB l. +b block special file; +c character special file; +d directory; +D Solaris door; +f plain file; +l symbolic link; +n HP-UX network special file; +p named pipe; +s socket. +.TE +.TP +.B \-follow +Always true; +causes find to follow symbolic links. +The `\fB\-type\fR l' condition never occurs in this case. +.TP +.BR \-links " n" +True if the file has +.I n +links. +.TP +.BR \-user " uname" +True if the file belongs to the user +.I uname +(login name or numeric user ID). +.TP +.BR \-group " gname" +True if the file belongs to group +.I gname +(group name or numeric group ID). +.TP +.BR \-size " n[" c ] +True if the file is +.I n +blocks long (512 bytes per block), +or, with +.BR c , +.I n +bytes long. +.TP +.BR \-inum " n" +True if the file has inode number +.I n. +.TP +.BR \-atime " n" +True if the file has been accessed in +.I n +days. +.TP +.BR \-mtime " n" +True if the file has been modified in +.I n +days. +.TP +.BR \-ctime " n" +True if the file inode has been changed in +.I n +days. +.TP +.BR \-exec " command ... " ; +True if the executed command returns +a zero value as exit status. +The end of the command must be punctuated by an (escaped) +semicolon. +A command argument `{}' is replaced by the +current pathname. +.TP +.BR \-exec " command ... " "{} +" +Always true. +The +.B {} +argument is replaced by a set of aggregated pathnames. +Each pathname is passed to the command as a single argument. +Every time a limit of arguments is reached +by the pathnames found so far, +the command is executed, +and aggregating starts using a new set +beginning with the next pathname. +If any invocation of command +returns a non-zero exit status, +find will return a non-zero exit status +when its processing is done. +.TP +.BR \-ok " command ... " ; +Like +.B \-exec +except that the generated command is written on +the standard output, then the standard input is read +and the command executed only upon response +.BR y . +.TP +.B \-print +Always true; +causes the current pathname to be printed. +If no expression is given, +.B \-print +is used per default +(as a change introduced by POSIX.2). +.TP +.BR \-newer " file" +True if +the current file has been modified more recently than the argument +.I file. +.TP +.BR \-anewer " file" +True if +the current file has been accessed more recently than the argument +.I file +was modified. +This primary is an extension. +.TP +.BR \-cnewer " file" +True if a status change +has occurred on the current file +more recently than the argument +.I file +was modified. +This primary is an extension. +.TP +.B \-depth +Always true; +causes the contents of each directory +to be examined before the directory itselves. +.TP +.BR \-fstype " type" +True if the current file +resides on a file system of the given type. +.TP +.B \-local +True if the file is on a local file system. +Are file system types except for +.I nfs +and +.I smbfs +are currently considered local. +.TP +.B \-mount +Always true; +restricts the search to directories +that have the same device id +as the currently examined path operand. +.TP +.B \-xdev +The same as +.BR \-mount . +This primary has been introduced by POSIX. +.TP +.B \-nouser +True if the file is owned by a user +that has no login name. +.TP +.B \-nogroup +True if the file is owned by a group +that lacks a group name. +.TP +.B \-prune +Always true. +Causes the search for the current path +to be stopped once the primary is evaluated. +When combined with +.BR \-depth , +.B \-prune +has no effect. +.TP +.BR \-cpio " device" +Always true. +Writes the file on the named device +in binary cpio format (5120-byte records). +Implies +.BR \-depth . +.TP +.BR \-ncpio " device" +Always true. +Writes the file on the named device +in SVR4 ASCII cpio format (5120-byte records). +Implies +.BR \-depth . +.PP +The primaries may be combined using the following operators +(in order of decreasing precedence): +.TP 4 +1) +A parenthesized group of primaries and operators +(parentheses are special to the Shell and must be escaped). +.TP 4 +2) +The negation of a primary +(`!' is the unary +.I not +operator). +.TP 4 +3) +Concatenation of primaries +(the +.I and +operation +is implied by the juxtaposition of two primaries +or by an explicit +.B \-a +operator). +.TP 4 +4) +Alternation of primaries +.RB "(`" \-o "' is the" +.I or +operator). +.PP +Options have been introduced by POSIX.1-2001 +in addition to the expression operators. +They must preceed the +.I pathname-list +one the command line +and have no effect on boolean expression processing. +.TP +.B \-H +Follow symbolic links given on the command line, +but do not follow symbolic links encountered during directory traversal. +.TP +.B \-L +Follow all symbolic links found, +like the +.I \-follow +primary. +.PP +With the +.I \-follow +primary or the +.I \-L +option, hierarchy loops caused by symbolic links are detected, +but only +.B /usr/5bin/posix2001/find +prints an error message. +The offending link is not followed, +and operation continues with the next directory entry found. +.SH EXAMPLES +To remove all files named +`a.out' or `*.o' that have not been accessed for a week: +.IP "" .2i +find / \\( \-name a.out \-o \-name \'*.o\' \\) +\-atime +7 \-exec rm {} \\; +.PP +The rm command is executed once for each file. +The form +.IP "" .2i +find / \\( \-name a.out \-o \-name \'*.o\' \\) +\-atime +7 \-exec rm {} + +.PP +is faster since the rm command is executed with a set of pathnames. +.PP +To find all files below the directory `documents' +that contain the regular expression `string': +.IP "" .2i +find documents \-type f \-exec grep string {} + +.PP +To find all files in the directory `home', +not descending into its subdirectories: +.IP "" .2i +find home ! \-name home \-prune +.PP +To check whether the file `diary' +has been updated within the last two days; +the name of the file is printed if true, +and is not printed otherwise: +.IP "" .2i +find diary \-prune \-mtime \-2 +.SH FILES +/etc/passwd +.br +/etc/group +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols in patterns with +.BR /usr/5bin/s42/find , +.BR /usr/5bin/posix/find , +and +.BR /usr/5bin/posix2001/find . +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +and character class expressions +in patterns. +.TP +.B SYSV3 +Causes the text of some diagnostic messages to be changed; +causes +.I \-ncpio +to create traditional ASCII cpio format archives. +.SH "SEE ALSO" +chmod(1), +cpio(1), +pax(1), +sh(1), +xargs(1), +stat(2), +glob(7), +locale(7) +.SH NOTES +Undesired effects can result if file names printed by +.I find +contain newline characters, +as shown by the following command sequence: +.RS +.sp +.nf +$ mkdir \-p \'dummy +> /etc\' +$ touch \'dummy +> /etc/passwd\' +$ find . \-print +\&. +\&./dummy +.sp +\&./dummy +/etc +\&./dummy +/etc/passwd +$\ +.fi +.sp +.RE +Shell scripts or utilities unaware of this problem +will operate on +.I /etc/passwd +(or other arbitrary file names) +when reading such output; +a malicious user might create such files +to read or overwrite privileged information. +To circumvent this problem, +one of the following proposals should be taken +unless the file hierarchy traversed by the +.I find +command is definitively known not to contain such file names: +.IP \(en 2 +If the output is read by the +.I xargs +utility to gain faster execution by aggregating command arguments as in +.in +2 +.sp +find . \-print | xargs \fIcommand\fR +.sp +.in -2 +a safe and equally fast substitute is the +.in +2 +.sp +find . \-exec \fIcommand\fR {} + +.sp +.in -2 +operand of +.IR find ; +it is not portably accepted by +.I find +implementations, though. +.IP \(en 2 +A universal solution for submitting file names to the +.I xargs +utility is given in the +.I NOTES +section of +.IR xargs (1). +.IP \(en 2 +The method employed by this script can be generalized as follows: +If the script or utility reading the output of +.I find +provides the necessary parsing capabilities, +special path prefixes can be given to the +.I find +command, such as +.in +2 +.sp +find /.//. \-print +.sp +.in -2 +for absolute path names or +.in +2 +.sp +find .//. \-print +.sp +.in -2 +for relative path names. +Since adjacent slash characters never appear +in relative file names found during directory traversal, +they can be taken as delimiters; +a line starts a new path name +only if the delimiter appears. +.IP \(en 2 +The +.I \-name +operand can be used to exclude all path names +that contain newline characters, as in +.in +2 +.sp +.nf +$ find . \-name \'* +> *\' \-prune \-o ! \-name \'* +> *\' \-print +.sp +.fi +.in -2 +Note that certain other implementations of +.I find +require a leading period in the pattern +to match file names with a leading period; +it may be necessary to exclude such patterns as well. +.IP \(en 2 +The +.I \-depth +operand cannot be combined with the +.I \-prune +operand used in the previous example. +When the directory name must be printed +after file names below that directory, +as with the +.IR cpio +command, +file names that leave the specified path hierarchy +should be filtered out: +.in +2 +.sp +find . \-depth | egrep \'^\e./\' | cpio \-oc \-O /dev/rmt/c0s0 +.sp +.in -2 +(note the escaped regular expression meta-character). +.IP \(en 2 +The +.I \-cpio +and +.I \-ncpio +operands will automatically exclude file names +that contain newline characters +with this +.I find +implementation. +.PP +The +.I \-print0 +operand supported by some other implementations +is considered a very limited work-around +since it does not allow the output to be processed +by utilities unaware of NUL characters; +it has therefore not been included here. diff --git a/find/find.c b/find/find.c @@ -0,0 +1,1554 @@ +/* find COMPILE: cc -o find -s -O -i find.c -lS */ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* from Unix 7th Edition /usr/src/cmd/find.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)find_su3.sl 1.45 (gritter) 5/8/06"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)find_sus.sl 1.45 (gritter) 5/8/06"; +#else +static const char sccsid[] USED = "@(#)find.sl 1.45 (gritter) 5/8/06"; +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include <pwd.h> +#include <time.h> +#include <grp.h> +#include <stdarg.h> +#include <libgen.h> +#include <errno.h> +#include <locale.h> +#include <signal.h> +#if defined (SUS) || defined (SU3) +#include <fnmatch.h> +#endif +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +#include <mntent.h> +#endif +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <sys/param.h> +#include <sys/mount.h> +#endif +#ifdef _AIX +#include <sys/sysmacros.h> +#endif +#ifndef major +#include <sys/mkdev.h> +#endif +#if __NetBSD_Version__>= 300000000 +#include <sys/statvfs.h> +#define statfs statvfs +#endif +#include "getdir.h" +#include "atoll.h" +#define A_DAY 86400L /* a day full of seconds */ +#define EQ(x, y) (strcmp(x, y)==0) + +#ifndef MNTTYPE_IGNORE +#define MNTTYPE_IGNORE "" +#endif + +#ifndef S_IFDOOR +#define S_IFDOOR 0xD000 +#endif + +#ifndef S_IFNWK +#define S_IFNWK 0x9000 +#endif + +#undef ctime +#define ctime find_ctime + +static char *Pathname; + +struct aggregate { /* for exec ... {} + */ + long a_cnt; /* count of arguments */ + long a_cur; /* current position in aggregate */ + long a_csz; /* aggregate current length */ + long a_msz; /* aggregate maximum length */ + char **a_vec; /* arguments */ + char *a_spc; /* aggregate space */ + long a_maxarg; /* maximum arguments in e_vec */ +}; + +struct anode { + int (*F)(struct anode *); + union anode_l { + struct anode *L; + char *pat; + time_t t; + uid_t u; + gid_t g; + ino_t i; + nlink_t link; + off_t sz; + mode_t per; + int com; + FILE *fp; + char *fstype; + } l; + union anode_r { + struct anode *R; + int s; + pid_t pid; + struct aggregate *a; + } r; +}; +static char *Fname; +static time_t Now; +static int Argc, + Ai, + Pi; +static char **Argv; +/* cpio stuff */ +static int Cpio; + +static struct stat Statb; + +/* + * Keep track of all visited directories, to avoid loops caused by + * symbolic links and to free storage and close files after fork(). + */ +static struct visit { + struct getdb *v_db; /* getdb struct for this level */ + ino_t v_ino; /* inode number */ + int v_fd; /* file descriptor */ + dev_t v_dev; /* device id */ +} *visited; +static int vismax; /* number of members in visited */ + +/* + * For -fstype, keep track of all filesystem types known to the system. If + * we had st_fstype in struct stat as SVR4 does, this would be far more + * reliable. + */ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static struct fstype { + dev_t fsdev; /* device id of filesystem */ + char *fstype; /* filesystem type */ +} *fstypes, *fscur; +#endif /* __linux__ || _AIX || __hpux */ + +static int Home = -1; +static int wanthome; +static mode_t um; /* user's umask */ +static const char *progname; +static int status; /* exit status */ +static int depth; /* -depth flag */ +static int Print = 1; /* implicit -print */ +static int Prune; /* -prune at this point */ +static int Mount; /* -mount, -xdev */ +static int Execplus; /* have a -exec command {} + node */ +static int HLflag; /* -H or -L option given */ +static char *Statfs; /* result of statfs() on FreeBSD */ +static int incomplete; /* encountered an incomplete statement */ +extern int sysv3; + +static int (*statfn)(const char *, struct stat *) = lstat; + +static struct anode *expr(void); +static struct anode *e1(void); +static struct anode *e2(void); +static struct anode *e3(void); +static struct anode *mk(struct anode *); +static void oper(const char **); +static char *nxtarg(int); +static int and(struct anode *); +static int or(struct anode *); +static int not(struct anode *); +static int glob(struct anode *); +static int print(struct anode *); +static int prune(struct anode *); +static int null(struct anode *); +static int mtime(struct anode *); +static int atime(struct anode *); +static int ctime(struct anode *); +static int user(struct anode *); +static int ino(struct anode *); +static int group(struct anode *); +static int nogroup(struct anode *); +static int nouser(struct anode *); +static int links(struct anode *); +static int size(struct anode *); +static int sizec(struct anode *); +static int perm(struct anode *); +static int type(struct anode *); +static int exeq(struct anode *); +static int ok(struct anode *); +static int cpio(struct anode *); +static int newer(struct anode *); +static int cnewer(struct anode *); +static int anewer(struct anode *); +static int fstype(struct anode *); +static int local(struct anode *); +static int scomp(long long, long long, char); +static int doex(int, struct aggregate *); +static struct aggregate *mkagg(long); +static uid_t getunum(const char *); +static gid_t getgnum(const char *); +static const char *getuser(uid_t); +static const char *getgroup(gid_t); +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static void getfscur(dev_t); +static void getfstypes(void); +#endif /* __linux__ || _AIX || __hpux */ +static int descend(char *, struct anode *, int); +static int descend1(char *, struct anode *, int); +static int descend2(char *, struct anode *, int); +static void setpath(char *, const char *, int); +static void pr(const char *, ...); +static void er(const char *, ...); +static void usage(void); +static void *srealloc(void *, size_t); +static void mkcpio(struct anode *, const char *, int); +static void trailer(struct anode *, int); +static void mknewer(struct anode *, const char *, int (*)(struct anode *)); +static mode_t newmode(const char *ms, const mode_t pm); + +int +main(int argc, char **argv) +{ + struct anode *exlist; + struct anode nlist = { null, { 0 }, { 0 } }; + int paths; + register char *sp = 0; + int i, j; + + time(&Now); + umask(um = umask(0)); + progname = basename(argv[0]); + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + if (getenv("SYSV3") != NULL) + sysv3 = 1; + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-' || argv[i][1] == '\0') + break; + if (argv[i][1] == '-') { + i++; + break; + } + for (j = 1; argv[i][j]; j++) + if (argv[i][j] != 'H' && argv[i][j] != 'L') + goto brk; + for (j = 1; argv[i][j]; j++) + HLflag = argv[i][j]; + } +brk: if (HLflag == 'L') + statfn = stat; + argc -= i - 1; + argv += i - 1; + Argc = argc; Argv = argv; + if(argc<2) { + pr("insufficient number of arguments"); + usage(); + } + for(Ai = paths = 1; Ai < argc; ++Ai, ++paths) + if(*Argv[Ai] == '-' || EQ(Argv[Ai], "(") || EQ(Argv[Ai], "!")) + break; + if(paths == 1) /* no path-list */ + usage(); + if(Ai<argc) { + if(!(exlist = expr())) /* parse and compile the arguments */ + er("find: parsing error"); + if(Ai<argc) { + pr("bad option %s", argv[Ai]); + usage(); + } + } else + exlist = &nlist; + if (paths > 2) + wanthome = 1; + if (wanthome && (Home = open(".", O_RDONLY)) < 0) + er("bad starting directory"); + for(Pi = 1; Pi < paths; ++Pi) { + if (Pi > 1 && Home >= 0 && fchdir(Home) < 0) + er("bad starting directory"); + setpath(Pathname, Argv[Pi], 0); + Fname = sp = Pathname; + do + if (sp[0] == '/') + Fname = &sp[1]; + while (*sp++); + descend(Pathname, exlist, 0); /* to find files that match */ + } + if(Cpio || Execplus) + trailer(exlist, 1); + exit(status); +} + +/* compile time functions: priority is expr()<e1()<e2()<e3() */ + +/*ARGSUSED*/ +static struct anode *expr(void) { /* parse ALTERNATION (-o) */ + register struct anode * p1; + struct anode n = { 0, { 0 }, { 0 } }; + + p1 = e1() /* get left operand */ ; + if(EQ(nxtarg(0), "-o")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); + n.F = or, n.l.L = p1, n.r.R = expr(); + return(mk(&n)); + } + else if(Ai <= Argc) --Ai; + return(p1); +} +static struct anode *e1(void) { /* parse CONCATENATION (formerly -a) */ + register struct anode * p1; + register char *a; + struct anode n = { 0, { 0 }, { 0 } }; + + p1 = e2(); + a = nxtarg(0); + if(EQ(a, "-a")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); +And: + n.F = and, n.l.L = p1, n.r.R = e1(); + return(mk(&n)); + } else if(EQ(a, "(") || EQ(a, "!") || (*a=='-' && !EQ(a, "-o"))) { + --Ai; + goto And; + } else if(Ai <= Argc) --Ai; + return(p1); +} +static struct anode *e2(void) { /* parse NOT (!) */ + struct anode n = { 0, { 0 }, { 0 } }; + if(EQ(nxtarg(0), "!")) { + const char *ops[] = { "-o", "-a", "!", 0 }; + oper(ops); + n.F = not, n.l.L = e3(); + return(mk(&n)); + } + else if(Ai <= Argc) --Ai; + return(e3()); +} +static struct anode *e3(void) { /* parse parens and predicates */ + struct anode *p1; + struct anode n = { 0, { 0 }, { 0 } }; + long i, k; + register char *a, *b, s, *p, *q; + + a = nxtarg(0); + if(EQ(a, "(")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); + p1 = expr(); + a = nxtarg(1); + if(!EQ(a, ")")) goto err; + return(p1); + } + else if(EQ(a, "-depth")) { + depth = 1; + n.F = null; + } else if(EQ(a, "-follow")) { + statfn = stat; + n.F = null; + } else if(EQ(a, "-mount") || EQ(a, "-xdev")) { + Mount = 1; + n.F = null; + } else if(EQ(a, "-print")) { + Print = 0; + n.F = print; + } else if(EQ(a, "-prune")) + n.F = prune; + else if(EQ(a, "-nogroup")) + n.F = nogroup; + else if(EQ(a, "-nouser")) + n.F = nouser; + else if(EQ(a, "-local")) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + getfstypes(); +#endif /* __linux__ || _AIX || __hpux */ + n.F = local; + Statfs = a; + } + if (n.F) + return mk(&n); + b = nxtarg(2); + s = *b; + /*if(s=='+') b++;*/ + if(EQ(a, "-name")) + n.F = glob, n.l.pat = b; + else if(EQ(a, "-mtime")) + n.F = mtime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-atime")) + n.F = atime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-ctime")) + n.F = ctime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-user")) + n.F = user, n.l.u = getunum(b), n.r.s = s; + else if(EQ(a, "-inum")) + n.F = ino, n.l.i = atoll(b), n.r.s = s; + else if(EQ(a, "-group")) + n.F = group, n.l.g = getgnum(b), n.r.s = s; + else if(EQ(a, "-size")) { + n.l.sz = atoll(b), n.r.s = s; + while (b[0] && b[1]) + b++; + if (b[0] == 'c') + n.F = sizec; + else + n.F = size; + } + else if(EQ(a, "-links")) + n.F = links, n.l.link = atol(b), n.r.s = s; + else if(EQ(a, "-perm")) { + while (*b == '-') + b++; + n.F = perm, n.l.per = newmode(b, 0), n.r.s = s; +#if defined (SUS) || defined (SU3) + if (s == '-') + n.l.per &= 07777; +#endif + } + else if(EQ(a, "-type")) { + i = b[0] == '-' || b[0] == '+' ? b[1] : b[0]; + i = i=='d' ? S_IFDIR : + i=='b' ? S_IFBLK : + i=='c' ? S_IFCHR : + i=='D' ? S_IFDOOR : + i=='f' ? S_IFREG : + i=='l' ? S_IFLNK : + i=='n' ? S_IFNWK : + i=='p' ? S_IFIFO : + i=='s' ? S_IFSOCK : + 0; + n.F = type, n.l.per = i; + } + else if (EQ(a, "-exec")) { + Print = 0; + wanthome = 1; + i = Ai - 1; + q = ""; + k = 0; + while(!EQ(p = nxtarg(1), ";")) { + if (EQ(p, "+") && EQ(q, "{}")) { + n.r.a = mkagg(k); + break; + } + q = p; + k += strlen(p) + 1; + } + n.F = exeq, n.l.com = i; + } + else if (EQ(a, "-ok")) { + Print = 0; + wanthome = 1; + i = Ai - 1; + while(!EQ(p = nxtarg(1), ";")); + n.F = ok, n.l.com = i; + } + else if(EQ(a, "-cpio")) + mkcpio(&n, b, 0); + else if(EQ(a, "-ncpio")) + mkcpio(&n, b, 1); + else if(EQ(a, "-newer")) + mknewer(&n, b, newer); + else if(EQ(a, "-anewer")) + mknewer(&n, b, anewer); + else if(EQ(a, "-cnewer")) + mknewer(&n, b, cnewer); + else if(EQ(a, "-fstype")) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + getfstypes(); +#endif /* __linux__ || _AIX || __hpux */ + n.F = fstype, n.l.fstype = b; + Statfs = a; + } + if (n.F) { + if (incomplete) + nxtarg(1); + return mk(&n); + } +err: pr("bad option %s", a); + usage(); + /*NOTREACHED*/ + return 0; +} +static struct anode *mk(struct anode *p) +{ + struct anode *n; + + n = srealloc(NULL, sizeof *n); + *n = *p; + return(n); +} +static void oper(const char **ops) +{ + char *a; + + a = nxtarg(-1); + while (*ops) + if (EQ(a, *ops++)) + er("operand follows operand"); + Ai--; +} + +static char *nxtarg(int must) { /* get next arg from command line */ + static int strikes = 0; + + if(must==1 && Ai>=Argc || strikes==3) + er("incomplete statement"); + if(Ai>=Argc) { + if (must >= 0) + strikes++; + incomplete = 1; + Ai = Argc + 1; + return(""); + } + return(Argv[Ai++]); +} + +/* execution time functions */ +static int and(register struct anode *p) +{ + return(((*p->l.L->F)(p->l.L)) && ((*p->r.R->F)(p->r.R))?1:0); +} +static int or(register struct anode *p) +{ + return(((*p->l.L->F)(p->l.L)) || ((*p->r.R->F)(p->r.R))?1:0); +} +static int not(register struct anode *p) +{ + return( !((*p->l.L->F)(p->l.L))); +} +#if !defined (SUS) && !defined (SU3) +static int glob(register struct anode *p) +{ + extern int gmatch(const char *, const char *); + return(gmatch(Fname, p->l.pat)); +} +#else /* SUS, SU3 */ +static int glob(register struct anode *p) +{ + int val; +#ifdef __GLIBC__ + /* avoid glibc's broken [^...] */ + extern char **environ; + char **savenv = environ; + char *newenv[] = { "POSIXLY_CORRECT=", NULL }; + environ = newenv; +#endif /* __GLIBC__ */ + val = fnmatch(p->l.pat, Fname, FNM_PATHNAME) == 0; +#ifdef __GLIBC__ + environ = savenv; +#endif /* __GLIBC__ */ + return val; +} +#endif /* SUS, SU3 */ +/*ARGSUSED*/ +static int print(register struct anode *p) +{ + puts(Pathname); + return(1); +} +/*ARGSUSED*/ +static int prune(register struct anode *p) +{ + if (!depth) + Prune = 1; + return(1); +} +/*ARGSUSED*/ +static int null(register struct anode *p) +{ + return(1); +} +static int mtime(register struct anode *p) +{ + return(scomp((Now - Statb.st_mtime) / A_DAY, p->l.t, p->r.s)); +} +static int atime(register struct anode *p) +{ + return(scomp((Now - Statb.st_atime) / A_DAY, p->l.t, p->r.s)); +} +static int ctime(register struct anode *p) +{ + return(scomp((Now - Statb.st_ctime) / A_DAY, p->l.t, p->r.s)); +} +static int user(register struct anode *p) +{ + return(scomp(Statb.st_uid, p->l.u, p->r.s)); +} +static int ino(register struct anode *p) +{ + return(scomp(Statb.st_ino, p->l.u, p->r.s)); +} +static int group(register struct anode *p) +{ + return(p->l.u == Statb.st_gid); +} +static int nogroup(register struct anode *p) +{ + return(getgroup(Statb.st_gid) == NULL); +} +static int nouser(register struct anode *p) +{ + return(getuser(Statb.st_uid) == NULL); +} +static int links(register struct anode *p) +{ + return(scomp(Statb.st_nlink, p->l.link, p->r.s)); +} +static int size(register struct anode *p) +{ + return(scomp(Statb.st_size?(Statb.st_size+511)>>9:0, p->l.sz, p->r.s)); +} +static int sizec(register struct anode *p) +{ + return(scomp(Statb.st_size, p->l.sz, p->r.s)); +} +static int perm(register struct anode *p) +{ + register int i; + i = (p->r.s=='-') ? p->l.per : 07777; /* '-' means only arg bits */ + return((Statb.st_mode & i & 07777) == p->l.per); +} +static int type(register struct anode *p) +{ + return((Statb.st_mode&S_IFMT)==p->l.per); +} +static int exeq(register struct anode *p) +{ + if (p->r.a) { + if (Pathname) { + size_t sz = strlen(Pathname) + 1; + if (p->r.a->a_csz + sz <= p->r.a->a_msz && + p->r.a->a_cur < p->r.a->a_maxarg-1) { + strcpy(p->r.a->a_vec[p->r.a->a_cur++] = + &p->r.a->a_spc[p->r.a->a_csz], + Pathname); + p->r.a->a_csz += sz; + return 1; + } else { + if (p->r.a->a_cur == 0) { + p->r.a->a_vec[p->r.a->a_cur++] = + Pathname; + p->r.a->a_vec[p->r.a->a_cur] = NULL; + } + else { + p->r.a->a_vec[p->r.a->a_cur] = NULL; + fflush(stdout); + doex(p->l.com, p->r.a); + return exeq(p); + } + } + } else { + if (p->r.a->a_cur == 0) + return 1; + p->r.a->a_vec[p->r.a->a_cur] = NULL; + } + } + fflush(stdout); /* to flush possible `-print' */ + return(doex(p->l.com, p->r.a)); +} +static int ok(struct anode *p) +{ + char c; int yes; + yes = 0; + fflush(stdout); /* to flush possible `-print' */ + fprintf(stderr, "< %s ... %s >? ", Argv[p->l.com], Pathname); + if (read(0, &c, 1) != 1) + exit(2); + yes = c == 'y'; + if (c != '\n') + while (read(0, &c, 1) == 1 && c != '\n'); + if(yes) return(doex(p->l.com, 0)); + return(0); +} + +static int cpio(struct anode *p) +{ + if (strchr(Pathname, '\n')) { + pr("file name \"%s\" contains a newline character; " + "file not archived", Pathname); + status |= 1; + } else + fprintf(p->l.fp, "%s\n", Pathname); + return(1); +} +static int newer(register struct anode *p) +{ + return Statb.st_mtime > p->l.t; +} +static int anewer(register struct anode *p) +{ + return Statb.st_atime > p->l.t; +} +static int cnewer(register struct anode *p) +{ + return Statb.st_ctime > p->l.t; +} +static int fstype(register struct anode *p) +{ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + return(EQ(fscur->fstype, p->l.fstype)); +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + return(EQ(Statfs, p->l.fstype)); +#else + return(EQ(Statb.st_fstype, p->l.fstype)); +#endif +} +static int local(register struct anode *p) +{ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + return(strcmp(fscur->fstype, "nfs") && strcmp(fscur->fstype, "smbfs")); +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + return(strcmp(Statfs, "nfs") != 0); +#else + return(strcmp(Statb.st_fstype, "nfs") != 0); +#endif +} + +/* support functions */ +/* funny signed compare */ +static int scomp(register long long a, register long long b, register char s) +{ + if(s == '+') + return(a > b); + if(s == '-') + return(a < (b * -1)); + return(a == b); +} + +static int +doex(int com, struct aggregate *a) +{ + register int np; + register char *na; + char **oargv; + int oargc; + static char **nargv; + static int narga; + static int ccode; + pid_t pid; + + ccode = np = 0; + oargv = Argv; + oargc = com; + while (na=oargv[oargc++]) { + if (np >= narga-1) + nargv = srealloc(nargv, (narga+=20) * sizeof *nargv); + if(strcmp(na, ";")==0 && oargv == Argv) break; + if(strcmp(na, "{}")==0 && oargv == Argv) { + if (a) { + oargv = a->a_vec; + oargc = 0; + } else + nargv[np++] = Pathname; + } + else nargv[np++] = na; + } + if (a) { + a->a_cur = 0; + a->a_csz = 0; + } + if (np==0) return(9); + nargv[np] = 0; + if(pid = fork()) /*parent*/ while (wait(&ccode) != pid); + else { /*child*/ + if (fchdir(Home) < 0) { + pr("bad starting directory"); + _exit(1); + } + execvp(nargv[0], nargv); + _exit(1); + } + if (a && ccode) { + if (WIFSIGNALED(ccode)) + status |= WTERMSIG(ccode) | 0200; + else if (WIFEXITED(ccode)) + status |= WEXITSTATUS(ccode); + } + return(ccode && a==NULL ? 0:1); +} + +static struct aggregate *mkagg(long baselen) +{ + static size_t envsz; + extern char **environ; + register int i; + struct aggregate *a; + + a = srealloc(NULL, sizeof *a); + if (envsz == 0) + for (i = 0; environ[i]; i++) + envsz += strlen(environ[i]) + 1; + a->a_msz = sysconf(_SC_ARG_MAX) - baselen - envsz - 2048; + a->a_spc = srealloc(NULL, a->a_msz); + a->a_maxarg = 8192; + a->a_vec = srealloc(NULL, a->a_maxarg * sizeof *a->a_vec); + a->a_csz = 0; + a->a_cur = 0; + Execplus = 1; + return a; +} + +static uid_t getunum(const char *s) { /* find user name and return number */ + struct passwd *pwd; + char *x; + uid_t u; + + if ((pwd = getpwnam(s)) != NULL) + return pwd->pw_uid; + u = strtol(s, &x, 10); + if (*x == '\0') + return u; + er("cannot find %s name", s); + /*NOTREACHED*/ + return 0; +} + +static gid_t getgnum(const char *s) { /* find group name and return number */ + struct group *grp; + char *x; + gid_t g; + + if ((grp = getgrnam(s)) != NULL) + return grp->gr_gid; + g = strtol(s, &x, 10); + if (*x == '\0') + return g; + er("cannot find %s name", s); + /*NOTREACHED*/ + return 0; +} + +#define CACHESIZE 16 + +static const char *getuser(uid_t uid) +{ + static struct { + char *name; + uid_t uid; + } cache[CACHESIZE]; + static int last; + int i; + struct passwd *pwd; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].uid == uid) + goto found; + if ((pwd = getpwuid(uid)) != NULL) + name = pwd->pw_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = strdup(name); + cache[i].uid = uid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +static const char *getgroup(gid_t gid) +{ + static struct { + char *name; + gid_t gid; + } cache[CACHESIZE]; + static int last; + int i; + struct group *grp; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].gid == gid) + goto found; + if ((grp = getgrgid(gid)) != NULL) + name = grp->gr_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = strdup(name); + cache[i].gid = gid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static void getfscur(dev_t dev) +{ + int i; + + for (i = 0; fstypes[i].fstype; i++) + if (fstypes[i].fsdev == dev) { + fscur = &fstypes[i]; + return; + } + er("filesystem type for %s unknown", Pathname); +} + +static void getfstypes(void) +{ + struct stat st; + FILE *fp; + struct mntent *mp; +#ifdef __hpux + const char mtab[] = "/etc/mnttab"; +#else /* __linux__, _AIX */ + const char mtab[] = "/etc/mtab"; +#endif /* __linux__, _AIX */ + int i = 0; + + if (fstypes) + return; + if ((fp = setmntent(mtab, "r")) == NULL) + er("cannot open %s: %s", mtab, strerror(errno)); + while ((mp = getmntent(fp)) != NULL) { + if (EQ(mp->mnt_type, MNTTYPE_IGNORE)) + continue; + if (stat(mp->mnt_dir, &st) < 0) + continue; + fstypes = srealloc(fstypes, (i+1) * sizeof *fstypes); + fstypes[i].fsdev = st.st_dev; + fstypes[i].fstype = strdup(mp->mnt_type); + i++; + } + endmntent(fp); +} +#endif /* __linux__ || _AIX || __hpux */ + +/* + * First part of descend, called for any file found. + */ +static int descend(char *fname, struct anode *exlist, int level) +{ + struct stat ost; + register char *c1; + int i; + int rv = 0; + + if(statfn(fname, &Statb)<0) { + if (statfn != lstat && lstat(fname, &Statb) == 0) + nof: c1 = "cannot follow symbolic link %s: %s"; + else if (sysv3) + c1 = "stat() failed: %s: %s"; + else if (errno == ENOENT || errno == ENOTDIR) + c1 = "cannot open %s: %s"; + else + c1 = "stat() error %s: %s"; + pr(c1, Pathname, strerror(errno)); + status = 18; + return(0); + } + if (level == 0 && HLflag == 'H' && (Statb.st_mode&S_IFMT) == S_IFLNK) { + struct stat nst; + if (stat(fname, &nst) == 0) + Statb = nst; + else if (errno == ELOOP) + goto nof; + } +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) + if (Statfs != NULL) { + static struct statfs sf; + if (statfs(fname, &sf) < 0) { + pr("statfs() error %s: %s", Pathname, strerror(errno)); + status = 18; + return(0); + } + Statfs = sf.f_fstypename; + } +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + if (Mount) { + static dev_t curdev; + if (level == 0) + curdev = Statb.st_dev; + else if (curdev != Statb.st_dev) + return(0); + } + Prune = 0; + if (!depth) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + if (fstypes) + getfscur(Statb.st_dev); +#endif /* __linux__ || _AIX || __hpux */ + if((*exlist->F)(exlist) && Print) + puts(Pathname); + } else + ost = Statb; + if(Prune || (Statb.st_mode&S_IFMT)!=S_IFDIR) + goto reg; + if (statfn != lstat) { + for (i = 0; i < level; i++) + if (Statb.st_dev == visited[i].v_dev && + Statb.st_ino == visited[i].v_ino) { +#ifdef SU3 + pr("Symbolic link loop at %s", Pathname); + status = 18; +#endif /* SU3 */ + goto reg; + } + } + if (level >= vismax) { + vismax += 20; + visited = srealloc(visited, sizeof *visited * vismax); + } + visited[level].v_dev = Statb.st_dev; + visited[level].v_ino = Statb.st_ino; + + rv = descend1(fname, exlist, level); + +reg: + if (depth) { + Statb = ost; +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + if (fstypes) + getfscur(Statb.st_dev); +#endif /* __linux__ || _AIX || __hpux */ + if ((*exlist->F)(exlist) && Print) + puts(Pathname); + } + return(rv); +} + +/* + * Second part of descend, called for any directory found. + */ +static int descend1(char *fname, struct anode *exlist, int level) +{ + int dir = 0; /* open directory */ + register char *c1; + struct getdb *db; + register struct direc *dp; + int endofname; + int err; + int oflags = O_RDONLY; + +#ifdef O_DIRECTORY + oflags |= O_DIRECTORY; +#endif +#ifdef O_NOFOLLOW + if (statfn == lstat && (HLflag != 'H' || level > 0)) + oflags |= O_NOFOLLOW; +#endif + if ((dir = open(fname, oflags)) < 0 || + fcntl(dir, F_SETFD, FD_CLOEXEC) < 0 || + fchdir(dir) < 0) { + if (dir >= 0) + close(dir); + else if (errno == EMFILE && descend2(fname, exlist, level)) + /* + * A possible performance improvement would be to + * call descend2() in the directory above, since + * the current method involves one fork() call per + * subdirectory at this level. The condition occurs + * so rarely that it seems hardly worth optimization + * though. + */ + return 0; + pr("cannot open %s: %s", Pathname, strerror(errno)); + status = 18; + return 0; + } + if ((db = getdb_alloc(Pathname, dir)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + visited[level].v_db = db; + visited[level].v_fd = dir; + for(c1 = Pathname; *c1; ++c1); + if(*(c1-1) == '/') + --c1; + endofname = c1 - Pathname; + + while ((dp = getdir(db, &err)) != NULL) { + if((dp->d_name[0]=='.' && dp->d_name[1]=='\0') || + (dp->d_name[0]=='.' && + dp->d_name[1]=='.' && dp->d_name[2]=='\0')) + continue; + setpath(&Pathname[endofname], dp->d_name, 1); + Fname = &Pathname[endofname+1]; + if(descend(Fname, exlist, level+1)) { + if (fchdir(dir) < 0) + er("bad directory tree"); + } + } + Pathname[endofname] = '\0'; + getdb_free(db); + if (err) { + pr("cannot read dir %s: %s", Pathname, strerror(errno)); + status = 18; + } + close(dir); + visited[level].v_fd = -1; + return 1; +} + +/* + * Third part of descend, called if the limit of open file descriptors + * is exceeded (EMFILE). + */ +static int descend2(char *fname, struct anode *exlist, int level) +{ + pid_t pid; + int i; + + if (Cpio || Execplus) + trailer(exlist, 0); + fflush(stdout); + switch (pid = fork()) { + case 0: + for (i = 0; i < level-1; i++) { + if (visited[i].v_fd >= 0) { + getdb_free(visited[i].v_db); + close(visited[i].v_fd); + visited[i].v_fd = -1; + } + } + status |= 0; + descend1(fname, exlist, level); + if (Cpio || Execplus) + trailer(exlist, 0); + exit(status); + /*NOTREACHED*/ + default: + while (waitpid(pid, &i, 0) != pid); + if (i && WIFSIGNALED(i)) { + struct rlimit rl; + + rl.rlim_cur = rl.rlim_max = 0; + setrlimit(RLIMIT_CORE, &rl); + raise(WTERMSIG(i)); + pause(); + } + if (i) + status |= WEXITSTATUS(i); + return 1; + case -1: + return 0; + } +} +static void setpath(char *eos, const char *fn, int slash) +{ + static char *pathend; + char *opath; + + for (;;) { + if (eos >= pathend) { + pathend += 14; + opath = Pathname; + Pathname = srealloc(Pathname, pathend - Pathname); + eos += Pathname - opath; + pathend += Pathname - opath; + } + if (slash) { + *eos++ = '/'; + slash = 0; + } else + if ((*eos++ = *fn++) == '\0') + break; + } +} + +static void pr(const char *s, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + fprintf(stderr, "\n"); +} + +static void er(const char *s, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + +static void usage(void) +{ + er("path-list predicate-list"); +} + +static void *srealloc(void *op, size_t n) +{ + void *np; + + if ((np = realloc(op, n)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +static void mkcpio(struct anode *p, const char *b, int ascii) +{ + int fd, pd[2]; + char flags[20], *cp; + + p->F = cpio; + if (*b == '\0') + return; + depth = 1; + Print = 0; + Cpio = 1; + if (pipe(pd) < 0 || (p->l.fp = fdopen(pd[1], "w")) == NULL) + er("pipe() %s", strerror(errno)); + if ((fd = creat(b, 0666)) < 0) + er("cannot create %s", b); + switch (p->r.pid = fork()) { + case -1: + er("can't fork"); + /*NOTREACHED*/ + case 0: + dup2(pd[0], 0); + close(pd[0]); + close(pd[1]); + dup2(fd, 1); + close(fd); + cp = flags; + *cp++ = '-'; + *cp++ = 'o'; + *cp++ = 'B'; + if (ascii) + *cp++ = 'c'; + if (statfn == stat) + *cp++ = 'L'; + *cp = '\0'; + execlp("cpio", "cpio", flags, NULL); + pr("cannot exec cpio: %s", strerror(errno)); + _exit(0177); + /*NOTREACHED*/ + } + close(pd[0]); + close(fd); +} + +static void +trailer(register struct anode *p, int termcpio) +{ + char *Opath = Pathname; + Pathname = 0; + if (p->F == or || p->F == and) { + trailer(p->l.L, termcpio); + trailer(p->r.R, termcpio); + } else if (p->F == not) + trailer(p->l.L, termcpio); + else if (p->F == cpio) { + if (termcpio) { + int s; + + fclose(p->l.fp); + while (waitpid(p->r.pid, &s, 0) != p->r.pid); + if (s) { + if (WIFEXITED(s)) + status |= WEXITSTATUS(s); + else if (WIFSIGNALED(s)) + status |= WTERMSIG(s) | 0200; + } + } else + fflush(p->l.fp); + } else if (p->F == exeq && p->r.a) + exeq(p); + Pathname = Opath; +} + +static void +mknewer(struct anode *p, const char *b, int (*f)(struct anode *)) +{ + if (*b && stat(b, &Statb) < 0) + er("cannot access %s", b); + p->l.t = Statb.st_mtime; + p->F = f; +} + +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* from Unix 7th Edition /usr/src/cmd/chmod.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define USER 05700 /* user's bits */ +#define GROUP 02070 /* group's bits */ +#define OTHER 00007 /* other's bits */ +#define ALL 07777 /* all */ + +#define READ 00444 /* read permit */ +#define WRITE 00222 /* write permit */ +#define EXEC 00111 /* exec permit */ +#define SETID 06000 /* set[ug]id */ +#define STICKY 01000 /* sticky bit */ + +#ifndef S_ENFMT +#define S_ENFMT 02000 /* mandatory locking bit */ +#endif + +static mode_t absol(const char **); +static mode_t who(const char **, mode_t *); +static int what(const char **); +static mode_t where(const char **, mode_t, int *, int *, const mode_t); + +static mode_t +newmode(const char *ms, const mode_t pm) +{ + register mode_t o, m, b; + int lock, setsgid = 0, cleared = 0, copy = 0; + mode_t nm, om, mm; + const char *mo = ms; + + nm = om = pm; + m = absol(&ms); + if (!*ms) { + nm = m; + goto out; + } + if ((lock = (nm&S_IFMT) != S_IFDIR && (nm&(S_ENFMT|S_IXGRP)) == S_ENFMT) + == 01) + nm &= ~(mode_t)S_ENFMT; + do { + m = who(&ms, &mm); + while (o = what(&ms)) { + b = where(&ms, nm, &lock, &copy, pm); + switch (o) { + case '+': + nm |= b & m & ~mm; + if (b & S_ISGID) + setsgid = 1; + if (lock & 04) + lock |= 02; + break; + case '-': + nm &= ~(b & m & ~mm); + if (b & S_ISGID) + setsgid = 1; + if (lock & 04) + lock = 0; + break; + case '=': + nm &= ~m; + nm |= b & m & ~mm; + lock &= ~01; + if (lock & 04) + lock |= 02; + om = 0; + if (copy == 0) + cleared = 1; + break; + } + lock &= ~04; + } + } while (*ms++ == ','); + if (*--ms) + er("bad permissions: %s", mo); +out: if (pm & S_IFDIR) { + if ((pm & S_ISGID) && setsgid == 0) + nm |= S_ISGID; + else if ((nm & S_ISGID) && setsgid == 0) + nm &= ~(mode_t)S_ISGID; + } + return(nm); +} + +static mode_t +absol(const char **ms) +{ + register int c, i; + + i = 0; + while ((c = *(*ms)++) >= '0' && c <= '7') + i = (i << 3) + (c - '0'); + (*ms)--; + return(i); +} + +static mode_t +who(const char **ms, mode_t *mp) +{ + register int m; + + m = 0; + *mp = 0; + for (;;) switch (*(*ms)++) { + case 'u': + m |= USER; + continue; + case 'g': + m |= GROUP; + continue; + case 'o': + m |= OTHER; + continue; + case 'a': + m |= ALL; + continue; + default: + (*ms)--; + if (m == 0) { + m = ALL; + *mp = um; + } + return m; + } +} + +static int +what(const char **ms) +{ + switch (**ms) { + case '+': + case '-': + case '=': + return *(*ms)++; + } + return(0); +} + +static mode_t +where(const char **ms, mode_t om, int *lock, int *copy, const mode_t pm) +{ + register mode_t m; + + m = 0; + *copy = 0; + switch (**ms) { + case 'u': + m = (om & USER) >> 6; + goto dup; + case 'g': + m = (om & GROUP) >> 3; + goto dup; + case 'o': + m = (om & OTHER); + dup: + *copy = 1; + m &= (READ|WRITE|EXEC); + m |= (m << 3) | (m << 6); + ++(*ms); + return m; + } + for (;;) switch (*(*ms)++) { + case 'r': + m |= READ; + continue; + case 'w': + m |= WRITE; + continue; + case 'x': + m |= EXEC; + continue; + case 'X': + if ((pm&S_IFMT) == S_IFDIR || (pm & EXEC)) + m |= EXEC; + continue; + case 'l': + if ((pm&S_IFMT) != S_IFDIR) + *lock |= 04; + continue; + case 's': + m |= SETID; + continue; + case 't': + m |= STICKY; + continue; + default: + (*ms)--; + return m; + } +} diff --git a/find/mkfile b/find/mkfile @@ -0,0 +1,8 @@ +BIN = find +OBJ = find.o +LOCAL_CFLAGS = -DGETDIR -DSU3 +INSTALL_BIN = find +INSTALL_MAN1 = find.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/fmt/fmt.1 b/fmt/fmt.1 @@ -0,0 +1,115 @@ +.\" Copyright (c) 1980 Regents of the University of California. +.\" All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" Copyright for changes (c) 2003 +.\" Gunnar Ritter. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" This product includes software developed by Gunnar Ritter +.\" and his contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS '\fIAS IS\fR' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from 4.3BSD fmt.1 6.1 (Berkeley) 4/29/85 +.\" +.\" from FMT 1 "April 29, 1985" +.TH FMT 1 "5/6/03" "Heirloom Toolchest" "User Commands" +.SH NAME +fmt \- simple text formatter +.SH SYNOPSIS +\fBfmt\fR [\fB\-c\fR] [\fB\-s\fR] [\fB\-w\ \fIwidth\fR | \fB\-\fIwidth\fR] +[\fIfile\ ...\fR] +.SH DESCRIPTION +.I Fmt +is a simple text formatter which reads the concatenation of input +files (or standard input if none are given) and produces on +standard output a version of its input with lines as close to +72 characters long as possible. The spacing at the beginning +of the input lines is preserved in the output, as are blank lines +and interword spacing. +.PP +.I Fmt +is meant to format mail messages prior to sending, but may also be useful +for other simple tasks. +For instance, +within visual mode of the +.I ex +editor (e.g. +.IR vi ) +the command +.sp + !}fmt +.sp +will reformat a paragraph, +evening the lines. +.PP +The following options can be used +to alter the behavior of +.IR fmt : +.TP +.B \-c +Select crown margin mode +(for tagged paragraphs). +Paragraphs are separated by empty lines. +The indenting of the first and second line +of each paragraph is preserved, +and following lines are indented +like the second line. +.TP +.B \-s +Split lines, +but do not join lines +(i.\|e. preserve any newline character +found in the input file). +.TP +\fB\-w\fI\ width\fR +Set the length of generated output lines to +.I width +characters. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters, +the width of characters, +and the set of word-separating characters. +.SH "SEE ALSO" +nroff(1), +mailx(1), +vi(1) +.SH NOTES +The program was designed to be simple and fast \- for more complex +operations, the standard text processors are likely to be more appropriate. +.PP +The original version of this program was written by +Kurt Shoens. diff --git a/fmt/fmt.c b/fmt/fmt.c @@ -0,0 +1,678 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, April 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 1980 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + */ + +/* from 4.3BSD fmt.c 5.2 (Berkeley) 6/21/85 */ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)fmt.sl 1.9 (gritter) 5/29/05"; + +#include <stdio.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +#include <ctype.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> + +#ifdef __GLIBC__ +#ifdef _IO_putc_unlocked +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif + +#include <iblok.h> +#include <asciitype.h> + +/* + * fmt -- format the concatenation of input files or standard input + * onto standard output. Designed for use with Mail ~| + * + * Syntax: fmt [ -width ] [ name ... ] + * Author: Kurt Shoens (UCB) 12/7/78 + */ + +static int pfx; /* Current leading blank count */ +static long long lineno; /* Current input line */ +static int mark; /* we saw a head line */ +static long width = 72; /* Width that we will not exceed */ +static int cflag; /* crown margin mode */ +static int sflag; /* split only */ +static const char *progname; /* argv0 */ +static int mb_cur_max; + + +static const char *headnames[] = {"To", "Subject", "Cc", "Bcc", "bcc", 0}; + +static void setwidth(const char *); +static void usage(void); +static void fmt(struct iblok *); +static void prefix(const wchar_t *); +static void split(const wchar_t *); +static void setout(void); +static void pack(const wchar_t *); +static void oflush(void); +static void tabulate(wchar_t *); +static void leadin(void); +static int chkhead(const char *, const wchar_t *); +static int fromline(const wchar_t *); +static size_t colwidth(const wchar_t *); +static size_t colwidthn(const wchar_t *, const wchar_t *); +static void growibuf(void); +static void growobuf(void); + +/* + * Drive the whole formatter by managing input files. Also, + * cause initialization of the output stuff and flush it out + * at the end. + */ + +int +main(int argc, char **argv) +{ + register struct iblok *fi; + register int errs = 0, i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + setout(); + lineno = 1; + for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) { + if (argv[i][1] == '-' && argv[i][2] == '\0') { + i++; + break; + } + nopt: switch (argv[i][1]) { + case '\0': + continue; + case 'c': + cflag = 1; + break; + case 's': + sflag = 1; + break; + case 'w': + if (argv[i][2]) { + setwidth(&argv[i][2]); + continue; + } else if (i < argc) { + setwidth(argv[++i]); + continue; + } else + setwidth(NULL); + break; + case '0': + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + setwidth(&argv[i][1]); + continue; + default: + usage(); + exit(2); + } + argv[i]++; + goto nopt; + } + if (i < argc) { + while (i < argc) { + if ((fi = ib_open(argv[i], 0)) == NULL) { + perror(argv[i]); + errs |= 1; + } else + fmt(fi); + i++; + } + } else { + if ((fi = ib_alloc(0, 0)) == NULL) { + perror("stdin"); + errs |= 1; + } else + fmt(fi); + } + oflush(); + exit(errs); +} + +static void +setwidth(const char *s) +{ + char *x; + + if (s == NULL || (width = strtol(s, &x, 10), + width <= 0 || + *x != '\0' || *s == '+' || *s == '-')) { + usage(); + fprintf(stderr, " Non-numeric character found " + "in width specification\n"); + exit(2); + } +} + +static void +usage(void) +{ + fprintf(stderr, + "usage: %s [-c] [-s] [-w width | -width] [inputfile...]\n", + progname); +} + +static char * +getvalid(struct iblok *ip, wint_t *wp, int *mp) +{ + char *cp; + + do + cp = ib_getw(ip, wp, mp); + while (cp && *wp == WEOF); + return cp; +} + +#define get(mp, fi, c, m, b) (mp = mb_cur_max > 1 ? getvalid(fi, &c, &m) : \ + (b = c = ib_get(fi), m = 1, c != (wint_t)EOF ? &b : 0)) + +static int ibufsize; +static wchar_t *linebuf; +static wchar_t *canonb; + +/* + * Read up characters from the passed input file, forming lines, + * doing ^H processing, expanding tabs, stripping trailing blanks, + * and sending each line down for analysis. + */ +static void +fmt(struct iblok *fi) +{ + register int p, p2; + wint_t c; + register long col; + char *mp; + int m; + char b; + + get(mp, fi, c, m, b); + while (c != (wint_t)EOF) { + + /* + * Collect a line, doing ^H processing. + * Leave tabs for now. + */ + + p = 0; + while (c != '\n' && c != (wint_t)EOF) { + if (c == '\b') { + get(mp, fi, c, m, b); + continue; + } + if (!(mb_cur_max > 1 ? iswprint(c) : isprint(c)) && + c != '\t') { + get(mp, fi, c, m, b); + continue; + } + if (p >= ibufsize) + growibuf(); + linebuf[p++] = c; + get(mp, fi, c, m, b); + } + if (p >= ibufsize) + growibuf(); + linebuf[p] = '\0'; + + /* + * Toss anything remaining on the input line. + */ + + while (c != '\n' && c != (wint_t)EOF) + get(mp, fi, c, m, b); + + /* + * Expand tabs on the way to canonb. + */ + + col = 0; + p = p2 = 0; + while (c = linebuf[p++]) { + if (c != '\t') { + if (mb_cur_max > 1) + col += wcwidth(c); + else + col++; + if (p2 >= ibufsize) + growibuf(); + canonb[p2++] = c; + continue; + } + do { + if (p2 >= ibufsize) + growibuf(); + canonb[p2++] = ' '; + col++; + } while ((col & 07) != 0); + } + + /* + * Swipe trailing blanks from the line. + */ + + for (p2--; p2 >= 0 && canonb[p2] == ' '; p2--) + ; + if (p2 >= ibufsize-1) + growibuf(); + canonb[++p2] = '\0'; + prefix(canonb); + if (c != (wint_t)EOF) + get(mp, fi, c, m, b); + } +} + +/* + * Take a line devoid of tabs and other garbage and determine its + * blank prefix. If the indent changes, call for a linebreak. + * If the input line is blank, echo the blank line on the output. + * Finally, if the line minus the prefix is a mail header, try to keep + * it on a line by itself. + */ + +static void +prefix(const wchar_t *line) +{ + register const wchar_t *cp; + register const char **hp; + register long np; + register int h; + static int nlpp; /* number of lines on current paragraph */ + + if (wcslen(line) == 0) { + nlpp = 0; + oflush(); + putchar('\n'); + mark = 0; + return; + } + for (cp = line; *cp == ' '; cp++) + ; + np = cp - line; + + /* + * The following horrible expression attempts to avoid linebreaks + * when the indent changes due to a paragraph. + */ + + if (!cflag && np != pfx && (np > pfx || abs(pfx-np) > 8)) + oflush(); + if (h = fromline(cp)) + oflush(), mark = 1; + else if (mark) { + for (hp = &headnames[0]; *hp != NULL; hp++) + if (chkhead(*hp, cp)) { + h = 1; + oflush(); + break; + } + } + if (!h && (h = (*cp == '.' || sflag))) + oflush(); + if (!cflag || nlpp < 2) + pfx = np; + split(cp); + if (h) + oflush(); + nlpp++; + lineno++; +} + +/* + * Split up the passed line into output "words" which are + * maximal strings of non-blanks with the blank separation + * attached at the end. Pass these words along to the output + * line packer. + */ + +static wchar_t *word; + +static void +split(const wchar_t *line) +{ + register const wchar_t *cp; + register wchar_t *cp2; + + cp = line; + while (*cp) { + cp2 = word; + + /* + * Collect a 'word,' allowing it to contain escaped + * white space. + */ + + while (*cp && *cp != ' ') { + if (*cp == '\\' && iswspace(cp[1])) + *cp2++ = *cp++; + *cp2++ = *cp++; + } + + /* + * Guarantee a space at end of line. + * Two spaces after end of sentence punctuation. + */ + + if (*cp == '\0') { + *cp2++ = ' '; + if (strchr(".:!?", cp[-1])) + *cp2++ = ' '; + } + while (*cp == ' ') + *cp2++ = *cp++; + *cp2 = '\0'; + pack(word); + } +} + +/* + * Output section. + * Build up line images from the words passed in. Prefix + * each line with correct number of blanks. The buffer "outbuf" + * contains the current partial line image, including prefixed blanks. + * "outp" points to the next available space therein. When outp is NOSTR, + * there ain't nothing in there yet. At the bottom of this whole mess, + * leading tabs are reinserted. + */ + +static int obufsize; +static wchar_t *outbuf; /* Sandbagged output line image */ +static wchar_t *outp; /* Pointer in above */ + +/* + * Initialize the output section. + */ + +static void +setout(void) +{ + outp = NULL; +} + +/* + * Pack a word onto the output line. If this is the beginning of + * the line, push on the appropriately-sized string of blanks first. + * If the word won't fit on the current line, flush and begin a new + * line. If the word is too long to fit all by itself on a line, + * just give it its own and hope for the best. + */ + +static void +pack(const wchar_t *word) +{ + register const wchar_t *cp; + register long s, t; + + if (outp == NULL) + leadin(); + t = colwidth(word); + s = colwidthn(outbuf, outp); + if (t+s <= width) { + + /* + * In like flint! + */ + + for (cp = word; *cp; cp++) { + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp++ = *cp; + } + return; + } + if (s > pfx) { + oflush(); + leadin(); + } + for (cp = word; *cp; cp++) { + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp++ = *cp; + } +} + +/* + * If there is anything on the current output line, send it on + * its way. Set outp to NULL to indicate the absence of the current + * line prefix. + */ + +static void +oflush(void) +{ + if (outp == NULL) + return; + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp = '\0'; + tabulate(outbuf); + outp = NULL; +} + +/* + * Take the passed line buffer, insert leading tabs where possible, and + * output on standard output (finally). + */ + +static void +tabulate(wchar_t *line) +{ + register wchar_t *cp; + register int b, t; + + /* + * Toss trailing blanks in the output line. + */ + + cp = line + wcslen(line) - 1; + while (cp >= line && *cp == ' ') + cp--; + *++cp = '\0'; + + /* + * Count the leading blank space and tabulate. + */ + + for (cp = line; *cp == ' '; cp++) + ; + b = cp-line; + t = b >> 3; + b &= 07; + if (t > 0) + do + putchar('\t'); + while (--t); + if (b > 0) + do + putchar(' '); + while (--b); + while (*cp) { + if (mb_cur_max > 1 && *cp & ~(wchar_t)0177) { + char mb[MB_LEN_MAX]; + int i, n; + n = wctomb(mb, *cp); + for (i = 0; i < n; i++) + putchar(mb[i]); + } else + putchar(*cp); + cp++; + } + putchar('\n'); +} + +/* + * Initialize the output line with the appropriate number of + * leading blanks. + */ + +static void +leadin(void) +{ + register long b; + + if (outbuf == 0) + growobuf(); + for (b = 0; b < pfx; b++) { + if (b >= obufsize) + growobuf(); + outbuf[b] = ' '; + } + outp = &outbuf[b]; +} + +/* + * Is s2 the mail header field name s1? + */ + +static int +chkhead(register const char *s1, register const wchar_t *s2) +{ + + while (*s1 && *s1++ == *s2++); + if (*s1 != '\0') + return 0; + return 1; +} + +/* + * Sloppy recognition of Unix From_ lines (not according to the POSIX.2 + * mailx specification, but oriented on actual Unix tradition). We match + * the ERE + * ^From .* [A-Z][a-z][a-z] [A-Z][a-z][a-z] \ + * [0-9 ]?[0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] + */ + +static int +fromline(const wchar_t *cp) +{ + if (cp[0] != 'F' || cp[1] != 'r' || cp[2] != 'o' || cp[3] != 'm' || + cp[4] != ' ') + return 0; + cp += 5; + while (*cp && *cp != ' ') + cp++; + if (*cp++ != ' ') + return 0; + if (!upperchar(cp[0]) || !lowerchar(cp[1]) || !lowerchar(cp[2]) || + cp[3] != ' ' || + !upperchar(cp[4]) || !lowerchar(cp[5]) || !lowerchar(cp[6]) || + cp[7] != ' ') + return 0; + cp += 8; + if (digitchar(*cp) || *cp == ' ') + cp++; + if (!digitchar(cp[0]) || cp[1] != ' '|| + !digitchar(cp[2]) || !digitchar(cp[3]) || + cp[4] != ':' || + !digitchar(cp[5]) || !digitchar(cp[6]) || + cp[7] != ':' || + !digitchar(cp[8]) || !digitchar(cp[9])) + return 0; + return 1; +} + +static size_t +colwidth(const wchar_t *cp) +{ + size_t n = 0; + + if (mb_cur_max > 1) + while (*cp) + n += wcwidth(*cp++); + else + n = wcslen(cp); + return n; +} + +static size_t +colwidthn(const wchar_t *bot, const wchar_t *top) +{ + size_t n = 0; + + if (mb_cur_max > 1) + while (bot < top) + n += wcwidth(*bot++); + else + n = top - bot; + return n; +} + +static void +growibuf(void) +{ + ibufsize += 128; + if ((word = realloc(word, ibufsize * sizeof *word)) == 0 || + (linebuf = realloc(linebuf, ibufsize * sizeof *linebuf)) == 0 || + (canonb = realloc(canonb, ibufsize * sizeof *canonb)) == 0) { + fprintf(stderr, "%s: input line too long\n", progname); + exit(1); + } +} + +static void +growobuf(void) +{ + int diff = 0; + + if (outp != NULL) + diff = outp - outbuf; + obufsize += 128; + if ((outbuf = realloc(outbuf, obufsize * sizeof *outbuf)) == 0) { + fprintf(stderr, "%s: output line too long\n", progname); + exit(1); + } + if (outp != NULL) + outp = &outbuf[diff]; +} diff --git a/fmt/mkfile b/fmt/mkfile @@ -0,0 +1,7 @@ +BIN = fmt +OBJ = fmt.o +INSTALL_BIN = fmt +INSTALL_MAN1 = fmt.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/grep/ac.c b/grep/ac.c @@ -0,0 +1,578 @@ +/* + * Aho-Corasick algorithm derived from Unix 32V /usr/src/cmd/fgrep.c, + * additionally incorporating the fix from the v7 addenda tape. + * + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2002. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)fgrep.sl 2.10 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include "grep.h" +#include "alloc.h" + +#include <mbtowi.h> + +#define MAXSIZ 256 +#define QSIZE 128 + +struct words { + struct words *nst; + struct words *link; + struct words *fail; + int inp; + char out; +}; + +static struct words *w, *wcur; +static struct words *smax; +static struct words *q; + +static void ac_build(void); +static int ac_match(const char *, size_t); +static int ac_matchw(const char *, size_t); +static int ac_range(struct iblok *, char *); +static int ac_rangew(struct iblok *, char *); +static void cgotofn(void); +static void check(int, int); +static void woverflo(void); +static void qoverflo(struct words ***queue, int *qsize); +static void cfail(void); +static int a0_match(const char *, size_t); +static int a1_match(const char *, size_t); + +void +ac_select(void) +{ + build = ac_build; + match = mbcode ? ac_matchw : ac_match; + matchflags &= ~MF_NULTERM; + matchflags |= MF_LOCONV; +} + +static void +ac_build(void) +{ + struct expr *e; + + if (e0->e_flg & E_NULL) { + match = a0_match; + return; + } + for (e = e0; e; e = e->e_nxt) { + if (e->e_len == 0 && !xflag) { + match = a1_match; + return; + } + } + cgotofn(); + cfail(); + if (!iflag) + range = mbcode ? ac_rangew : ac_range; +} + +static int +ac_match(const char *line, size_t sz) +{ + register const char *p; + register int z; + register struct words *c; + int failed; + + p = line; + failed = 0; + c = w; + if (p == &line[sz]) + z = '\n'; + else + z = *p & 0377; + for (;;) { + nstate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + if (failed || p < &line[sz]) + return 0; + } + return 1; + } + if (++p >= &line[sz]) { + if (z == '\n') + return 0; + else + z = '\n'; + } else + z = *p & 0377; + } +} + +static int +ac_range(struct iblok *ip, char *last) +{ + register char *p; + register struct words *c; + int failed; + + p = ip->ib_cur; + lineno++; + failed = 0; + c = w; + for (;;) { + nstate: + if (c->inp == (*p & 0377)) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == (*p & 0377)) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + register char *ep = p; + while (*ep != '\n') + ep++; + if ((failed || ep > p) && vflag == 0) { + ip->ib_cur = &ep[1]; + goto nogood; + } + } + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + nogood: if ((p = ip->ib_cur) > last) + return 0; + lineno++; + c = w; + failed = 0; + continue; + } + if (*p++ == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + c = w; + failed = 0; + } + } +} + +static int +ac_matchw(const char *line, size_t sz) +{ + register const char *p; + wint_t z; + register struct words *c; + int failed, n = 0; + + p = line; + failed = 0; + c = w; + if (p == &line[sz]) + z = '\n'; + else { + if (*p & 0200) { + if ((n = mbtowi(&z, p, &line[sz] - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + } + for (;;) { + nstate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + if (failed || p < &line[sz]) + return 0; + } + return 1; + } + p += n; + if (p >= &line[sz]) { + if (z == '\n') + return 0; + else + z = '\n'; + } else { + if (*p & 0200) { + if ((n = mbtowi(&z, p, &line[sz] - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + } + } +} + +static int +ac_rangew(struct iblok *ip, char *last) +{ + register char *p; + wint_t z; + register struct words *c; + int failed, n = 0; + + p = ip->ib_cur; + lineno++; + failed = 0; + c = w; + for (;;) { + nstate: + if (*p & 0200) { + if ((n = mbtowi(&z, p, last + 1 - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + register char *ep = p; + while (*ep != '\n') + ep++; + if ((failed || ep > p) && vflag == 0) { + ip->ib_cur = &ep[1]; + goto nogood; + } + } + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + nogood: if ((p = ip->ib_cur) > last) + return 0; + lineno++; + c = w; + failed = 0; + continue; + } + p += n; + if (p[-n] == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + c = w; + failed = 0; + } + } +} + +static void +cgotofn(void) +{ + register int c; + register struct words *s; + + woverflo(); + s = smax = w = wcur; +nword: for(;;) { + c = nextch(); + if (c==EOF) + return; + if (c == '\n') { + if (xflag) { + for(;;) { + if (s->inp == c) { + s = s->nst; + break; + } + if (s->inp == 0) goto nenter; + if (s->link == 0) { + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->link = smax; + s = smax; + goto nenter; + } + s = s->link; + } + } + s->out = 1; + s = w; + } else { + loop: if (s->inp == c) { + s = s->nst; + continue; + } + if (s->inp == 0) goto enter; + if (s->link == 0) { + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->link = smax; + s = smax; + goto enter; + } + s = s->link; + goto loop; + } + } + + enter: + do { + s->inp = c; + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->nst = smax; + s = smax; + } while ((c = nextch()) != '\n' && c!=EOF); + if (xflag) { + nenter: s->inp = '\n'; + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->nst = smax; + } + smax->out = 1; + s = w; + if (c != EOF) + goto nword; +} + +static void +check(int val, int incr) +{ + if ((unsigned)(val + incr) >= INT_MAX) { + fprintf(stderr, "%s: wordlist too large\n", progname); + exit(2); + } +} + +static void +woverflo(void) +{ + wcur = smax = scalloc(MAXSIZ, sizeof *smax); +} + +static void +qoverflo(struct words ***queue, int *qsize) +{ + check(*qsize, QSIZE); + *queue = srealloc(*queue, (*qsize += QSIZE) * sizeof **queue); +} + +static void +cfail(void) +{ + struct words **queue = NULL; + int front, rear; + int qsize = 0; + struct words *state; + int bstart; + register char c; + register struct words *s; + qoverflo(&queue, &qsize); + s = w; + front = rear = 0; +init: if ((s->inp) != 0) { + queue[rear++] = s->nst; + if (rear >= qsize - 1) + qoverflo(&queue, &qsize); + } + if ((s = s->link) != 0) { + goto init; + } + + while (rear!=front) { + s = queue[front]; + if (front == qsize-1) + qoverflo(&queue, &qsize); + front++; + cloop: if ((c = s->inp) != 0) { + bstart = 0; + q = s->nst; + queue[rear] = q; + if (front < rear) { + if (rear >= qsize-1) + qoverflo(&queue, &qsize); + rear++; + } else + if (++rear == front) + qoverflo(&queue, &qsize); + state = s->fail; + floop: if (state == 0) { + state = w; + bstart = 1; + } + if (state->inp == c) { + qloop: q->fail = state->nst; + if ((state->nst)->out == 1) + q->out = 1; + if ((q = q->link) != 0) goto qloop; + } + else if ((state = state->link) != 0) + goto floop; + else if (bstart == 0) { + state = 0; + goto floop; + } + } + if ((s = s->link) != 0) + goto cloop; + } + free(queue); +} + +/*ARGSUSED*/ +static int +a0_match(const char *str, size_t sz) +{ + return 0; +} + +/*ARGSUSED*/ +static int +a1_match(const char *str, size_t sz) +{ + return 1; +} diff --git a/grep/alloc.c b/grep/alloc.c @@ -0,0 +1,81 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)alloc.c 1.3 (gritter) 4/17/03> */ + +/* + * Memory allocation routines. + */ + +#include <stdlib.h> +#include <unistd.h> +#include "alloc.h" + +/* + * Memory allocation with check. + */ +void * +smalloc(size_t nbytes) +{ + void *p; + + if ((p = (void *)malloc(nbytes)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return p; +} + +/* + * Memory reallocation with check. + */ +void * +srealloc(void *ptr, size_t nbytes) +{ + void *cp; + + if ((cp = (void *)realloc(ptr, nbytes)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return cp; +} + +/* + * Zero-filled allocation with check. + */ +void * +scalloc(size_t nelem, size_t elsize) +{ + void *cp; + + if ((cp = calloc(nelem, elsize)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return cp; +} diff --git a/grep/alloc.h b/grep/alloc.h @@ -0,0 +1,34 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)alloc.h 1.3 (gritter) 4/17/03> */ + +#include <sys/types.h> + +extern void *smalloc(size_t); +extern void *srealloc(void *, size_t); +extern void *scalloc(size_t, size_t); diff --git a/grep/config.h b/grep/config.h @@ -0,0 +1,4 @@ +/* Auto-generated by make. Do not edit! */ +#include <wchar.h> +#include <wctype.h> +#define LONGLONG diff --git a/grep/egrep.1 b/grep/egrep.1 @@ -0,0 +1,388 @@ +'\" t +.\" Sccsid @(#)egrep.1 1.42 (gritter) 8/14/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH EGREP 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +egrep \- search a file for a pattern using full regular expressions +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/egrep\fR [\fB\-e\fI\ pattern_list\fR\ ...] +[\fB\-f\fI\ pattern_file\fR] [\fB\-bchilnrRvz\fR] +[\fIpattern_list\fR] [\fIfile\fR\ ...] +.HP +.ad l +.PD 0 +\fB/usr/5bin/posix/egrep\fR \fB\-e\fI\ pattern_list\fR\ ... +[\fB\-f\fI\ pattern_file\fR] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/egrep\fR \fB\-f\fI\ pattern_file\fR +[\fB\-e\fI\ pattern_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/egrep\fR [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] [\fB\-bhinsrRvxz\fR] +\fIpattern_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B egrep +command searches the lines of the specified files +(or of standard input) +for occurrences of +.I pattern. +The default behavior is to print each matching line to standard output. +.PP +The +.B /usr/5bin/egrep +command accepts full regular expressions; +it uses a deterministic algorithm with moderate space requirements. +.PP +The +.B /usr/5bin/posix/egrep +command accepts extended regular expressions. +It uses a deterministic algorithm with moderate space requirements +unless the expression includes multi-character collating elements, +which cause the use of a nondeterministic algorithm. +.PP +.B /usr/5bin/s42/egrep +and +.B /usr/5bin/posix2001/egrep +are identical to +.BR /usr/5bin/posix/egrep . +.SS "Full Regular Expressions" +.PP +In the following description `character' excludes +newline: +.IP 1. +A \fB\e\fR followed by a single character +matches that character. +.IP 2. +The character \fB^\fR +(\fB$\fR) matches the beginning (end) of a line +as an \fIanchor\fR. +.IP 3. +A +.B .\& +matches any character. +.IP 4. +A single character not otherwise endowed with special +meaning matches that character. +.IP 5. +A string enclosed in brackets \fB[\|]\fR +forms a \fIbracket expression\fR that +matches any single character from the string. +Ranges of ASCII character codes may be abbreviated +as in `\fIa\fB\-\fIz0\fB\-\fI9\fR'. +A ] +may occur only as the first character of the string. +A literal \- must be placed where it can't be +mistaken as a range indicator. +.IP 6. +A regular expression followed by \fB*\fR (\fB+\fR, \fB?\fR) matches a sequence +of 0 or more (1 or more, 0 or 1) +matches of the regular expression. +.IP 7. +Two regular expressions concatenated +match a match of the first followed by a match of +the second. +.IP 8. +Two regular expressions separated by \fB|\fR or newline +match either a match for the first or a match for the +second (\fIalternation\fR). +.IP 9. +A regular expression enclosed in parentheses \fB(\|)\fR +matches a match for the regular expression (\fIgrouping\fR). +.LP +The order of precedence of operators +is [\|] then (\|) then +*+? then concatenation then | and newline. +.SS "Extended Regular Expressions" +Extended Regular Expressions add the following features +to Full Regular Expressions: +.IP 10. +A regular expression +followed by \fB{\fIm\fB,\fIn\fB}\fR +forms an \fIinterval expression\fR that +matches a sequence of \fIm\fR through \fIn\fR matches, inclusive, +of the regular expression. +The values of \fIm\fR and \fIn\fR must be non-negative +and smaller than 255. +The form \fB{\fIm\fB}\fR matches exactly \fIm\fR occurrences, +\fB{\fIm\fB,}\fR matches at least \fIm\fR occurrences. +.IP 11. +In bracket expressions as described in 5., +the following character sequences are considered special: +.IP +Character class expressions of the form +\fB[:\fIclass\fB:]\fR. +In the C LC_CTYPE locale, +the classes +.sp +.TS +l l l l. +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.TE +.sp +are recognized; +further locale-specific classes may be available. +A character class expression matches any character +that belongs to the given class in the current LC_CTYPE locale. +.IP +Collating symbol expressions of the form +\fB[.\fIc\fB.]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +A collating symbol expression +matches the specified collating symbol. +.IP +Equivalence class expressions of the form +\fB[=\fIc\fB=]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +An equivalence class expression +matches any character that has the same collating weight +as \fIc\fR. +.LP +The order of precedence of operators +is [=\|=] [:\|:] [.\|.] +then [\|] +then (\|) +then *+? {m,n} +then concatenation +then ^ $ +then | and newline. +.PP +Care should be taken when using the characters +$ * [ ^ | ? \' " ( ) and \e in the expression +as they are also meaningful to the Shell. +It is safest to enclose the entire expression +argument in single quotes \' \'. +.PP +Both +.B /usr/5bin/egrep +and +.B /usr/5bin/posix/egrep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.BI \-e\ pattern_list +Specifies one or more patterns, separated by newline characters. +A line is selected if one or more of the specified patterns are found. +.TP +.BI \-f\ pattern_file +One or more patterns, separated by newline +characters, are read from +.I pattern_file. +If multiple +.B \-e +or +.B \-f +options are supplied to +.BR /usr/5bin/posix/egrep , +all of the pattern lists will be evaluated. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-v +All lines but those matching are printed. +.PP +The following options are supported by +.B /usr/5bin/posix/egrep +only: +.TP +.B \-q +Do not write anything to standard output. +.TP +.B \-s +Error messages for nonexistent or unreadable files are suppressed. +.TP +.B \-x +Consider only lines consisting of the pattern as a whole, +like a regular expression surrounded by +.I ^ +and +.I $. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I egrep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I egrep +searches for the pattern in its output. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in extended regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both full and extended regular expressions, +the availability and composition of character classes +in extended regular expressions, +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +fgrep(1), +grep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. +.SH NOTES +If a line contains a +.SM NUL +character, +only matches up to this character are found with +.BR /usr/5bin/posix/egrep . +The entire matching line will be printed. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +For portable programs, restrict textual data +to the US-ASCII character set, +set the LC_CTYPE and LC_COLLATE variables to `C' or `POSIX', +and use the constructs in the second column +instead of the character class expressions as follows: +.RS +.sp +.TS +l l. +[[:alnum:]] [0\-9A\-Za\-z] +[[:alpha:]] [A\-Za\-z] +[[:blank:]] [\fI<tab><space>\fR] +[[:cntrl:]] [^\fI<space>\fR\-~] +[[:digit:]] [0\-9] +[[:graph:]] [!\-~] +[[:lower:]] [a\-z] +[[:print:]] [\fI<space>\fR\-~] +[[:punct:]] [!\-/:\-@[\-`{\-~] +[[:space:]] [\fI<tab><vt><ff><cr><space>\fR] +[[:upper:]] [A\-Z] +[[:xdigit:]] [0\-9a\-fA\-F] +.TE +.sp +.RE +.IR <tab> , +.IR <space> , +.IR <vt> , +.IR <ff> , +and +.I <cr> +indicate inclusion of +a literal tabulator, space, vertical tabulator, formfeed, +or carriage return character, respectively. +Do not put the +.IR <vt> , +.IR <ff> , +and +.I <cr> +characters into the range expression for the +.I space +class unless you actually want to match these characters. +.PP +Interval expressions were newly introduced +with extended regular expressions +and cannot be used in portable programs. +To put a literal +.RB ` { ' +character into an expression, +use +.IR [{] . diff --git a/grep/fgrep.1 b/grep/fgrep.1 @@ -0,0 +1,179 @@ +.\" +.\" Sccsid @(#)fgrep.1 1.24 (gritter) 1/24/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH FGREP 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +fgrep \- search a file for a character string +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/fgrep\fR [\fB\-e\fI\ string_list\fR\ ...] +[\fB\-f\fI\ string_file\fR] [\fB\-bchilnrRvxz\fR] +[\fIstring_list\fR] [\fIfile\fR\ ...] +.HP +.ad l +.PD 0 +\fB/usr/5bin/posix/fgrep\fR \fB\-e\fI\ string_list\fR\ ... +[\fB\-f\fI\ string_file\fR] [\fB\-c\fR|\fB\-l\fR] +[\fB\-bhinrRvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/fgrep\fR \fB\-f\fI\ string_file\fR +[\fB\-e\fI\ string_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR] +[\fB\-bhinrRvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/fgrep\fR [\fB\-c\fR|\fB\-l\fR] [\fB\-bhinrRvxz\fR] +\fIstring_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B fgrep +command searches the lines of the specified files +(or of standard input) +for occurrences of any of the newline separated strings in +.I string_list. +The default behavior is to print each matching line to standard output. +.PP +Both +.B /usr/5bin/fgrep +and +.B /usr/5bin/posix/fgrep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.BI \-e\ string_list +Specifies one or more strings, separated by newline characters. +A line is selected if one or more of the specified strings are found. +.TP +.BI \-f\ string_file +One or more strings, separated by newline +characters, are read from +.I string_file. +If multiple +.B \-e +or +.B \-f +options are supplied to +.BR /usr/5bin/posix/fgrep , +all of the pattern lists will be evaluated. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-v +All lines but those matching are printed. +.TP +.B \-x +(Exact) only lines matched in their entirety are printed. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I fgrep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I fgrep +searches for the pattern in its output. +.PP +.B /usr/5bin/s42/fgrep +and +.B /usr/5bin/posix2001/fgrep +are identical to +.BR /usr/5bin/posix/fgrep . +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +egrep(1), +grep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. diff --git a/grep/grep.1 b/grep/grep.1 @@ -0,0 +1,297 @@ +'\" t +.\" Sccsid @(#)grep.1 1.36 (gritter) 8/14/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH GREP 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +grep \- search a file for a pattern +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/grep\fR [\fB\-bchilnrRsvwz\fR] +\fIpattern\fR [\fIfile\fR\ ...] +.HP +.PD 0 +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +\fB\-e\fI\ pattern_list\fR\ ... +[\fB\-f\fI\ pattern_file\fR] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvwxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +\fB\-f\fI\ pattern_file\fR +[\fB\-e\fI\ pattern_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvwxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +[\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] [\fB\-bhinrRsvwxz\fR] +\fIpattern_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B grep +command searches the lines of the specified files +(or of standard input) +for occurrences of the regular expression +.I pattern. +The default behavior is to print each matching line to standard output. +.PP +The +.B /usr/5bin/grep +command accepts one pattern +that is treated as a simple regular expression; +it uses a compact nondeterministic algorithm. +.PP +The +.B /usr/5bin/posix/grep +command uses basic regular expressions by default +and accepts a newline-separated list of patterns +as described for the +.B \-e +option below. +It uses a deterministic algorithm with moderate space requirements +for most expressions; +backreferences, word delimiters, and multi-character collating elements +cause a nondeterministic algorithm to be used. +.PP +.B /usr/5bin/s42/grep +and +.B /usr/5bin/posix2001/grep +are identical to +.BR /usr/5bin/posix/grep . +.PP +See the description of +.IR ed (1) +for the specifications of simple and basic regular expressions. +.PP +Care should be taken when using the characters +$ * [ ^ | ? \' " ( ) and \e in the expression +as they are also meaningful to the Shell. +It is safest to enclose the entire expression +argument in single quotes \' \'. +.PP +Both +.B /usr/5bin/grep +and +.B /usr/5bin/posix/grep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-s +Error messages for nonexistent or unreadable files are suppressed. +.TP +.B \-v +All lines but those matching are printed. +.PP +The following options are supported by +.B /usr/5bin/posix/grep +only: +.TP +.BI \-e\ pattern_list +Specifies one or more patterns, separated by newline characters. +A line is selected if one or more of the specified patterns are found. +.TP +.B \-E +All patterns are interpreted as extended regular expressions +as described in +.IR egrep (1). +.TP +.BI \-f\ pattern_file +One or more patterns, separated by newline +characters, are read from +.I pattern_file. +.TP +.B \-F +All patterns are interpreted as fixed strings, +as with +.IR fgrep (1). +.TP +.B \-q +Do not write anything to standard output. +.TP +.B \-x +Consider only lines consisting of the pattern as a whole, +like a regular expression surrounded by +.I ^ +and +.I $. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I grep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-w +Searches for the patterns treated as words, +as if they were surrounded by `\e<\ \e>'. +Only available if neither the +.I \-E +nor the +.I \-F +option are also supplied. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I grep +searches for the pattern in its output. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +the availability and composition of character classes +in basic regular expressions, +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +egrep(1), +fgrep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. +.SH NOTES +If a line contains a +.SM NUL +character, +only matches up to this character are found +(unless +.B /usr/5bin/posix/grep +is used with the +.I \-F +option). +The entire matching line will be printed. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +The options supported by +.B /usr/5bin/posix/grep +that are not accepted by +.B /usr/5bin/grep +can easily be replaced by portable constructs: +Use +.I egrep +instead of +.BR \-E , +.I fgrep +instead of +.BR \-F . +Use +.I egrep +if you need the +.B \-e +or +.B \-f +option, +use only one of them and that only once; +if necessary, use text processing tools +to generate a single expression list before. +Redirect standard output to +.I /dev/null +for +.B \-q +(the possible speedup with +.I \-q +is never worth human time spent with porting scripts), +and use the +.RI ` ^ ' +and +.RI ` $ ' +meta-characters instead of +.BR \-x . diff --git a/grep/grep.c b/grep/grep.c @@ -0,0 +1,727 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)grep.c 1.53 (gritter) 12/27/06> */ + +/* + * Code common to all grep flavors. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> +#include <limits.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> + +#include "grep.h" +#include "alloc.h" + +/* + * Generic flags and the like. + */ +int Eflag; /* use EREs */ +int Fflag; /* use fixed strings */ +int bflag; /* print buffer count */ +int cflag; /* print count only */ +int fflag; /* had pattern file argument */ +int hflag; /* do not print filenames */ +int iflag; /* ignore case */ +int lflag; /* print filenames only */ +int nflag; /* print line numbers */ +int qflag; /* no output at all */ +int (*rflag)(const char *, struct stat *); /* operate recursively */ +int sflag; /* avoid error messages */ +int vflag; /* inverse selection */ +int wflag; /* search for words */ +int xflag; /* match entire line */ +int zflag; /* decompress compressed files */ +int mb_cur_max; /* avoid multiple calls to MB_CUR_MAX */ +unsigned status = 1; /* exit status */ +off_t lmatch; /* count of line matches */ +off_t lineno; /* current line number */ +char *progname; /* argv[0] to main() */ +char *filename; /* name of current file */ +char *options; /* for getopt() */ +void (*build)(void); /* compile function */ +int (*match)(const char *, size_t); /* comparison function */ +int (*range)(struct iblok *, char *); /* grep range of lines */ + +/* + * Regexp variables. + */ +struct expr *e0; /* start of expression list */ +enum matchflags matchflags; /* matcher flags */ + +/* + * To avoid link loops with -r. + */ +static struct visit { + ino_t v_ino; + dev_t v_dev; +} *visited; +static int vismax; /* number of members in visited */ + +/* + * Lower-case a character string. + */ +size_t +loconv(register char *dst, register char *src, size_t sz) +{ + char *odst = dst; + + if (mbcode) { + char mb[MB_LEN_MAX]; + wchar_t wc; + int len, i, nlen; + + while (sz > 0) { + if ((*src & 0200) == 0) { + *dst++ = tolower(*src); + src++; + sz--; + } else if ((len = mbtowc(&wc, src, sz)) <= 0 || + len > sz) { + *dst++ = *src++; + sz--; + } else { + wc = towlower(wc); + if (len >= mb_cur_max) { + if ((nlen = wctomb(dst, wc)) <= len) { + dst += nlen; + src += len; + sz -= len; + } else { + *dst++ = *src++; + sz--; + } + } else { + if ((nlen = wctomb(mb, wc)) <= len) { + sz -= len; + src += len; + for (i = 0; i < nlen; i++) + *dst++ = mb[i]; + } else { + *dst++ = *src++; + sz--; + } + } + } + } + } else { + while (sz--) { + *dst++ = tolower(*src & 0377); + src++; + } + } + return dst - odst; +} + +/* + * Determine if pat ends with an unescaped dollar sign. + */ +static int +termdollar(const char *pat, long len) +{ + int dollar = 1; + + if (len == 0 || pat[len - 1] != '$') + return 0; + pat += --len - 1; + while (len-- && *pat-- == '\\') + dollar = !dollar; + return dollar; +} + +/* + * Surround the pattern with \< \>. + */ +void +wcomp(char **pat, long *len) +{ + char *wp = smalloc(*len + 5); + + memcpy(&wp[2], *pat, *len); + if ((*pat)[0] == '^') + memcpy(wp, "^\\<", 3); + else + memcpy(wp, "\\<", 2); + if (termdollar(*pat, *len)) + strcpy(&wp[*len-1+2], "\\>$"); + else + strcpy(&wp[*len+2], "\\>"); + *len += 4; + *pat = wp; +} + +static struct iblok * +redirect(struct iblok *ip, const char *arg0, const char *arg1) +{ + struct iblok *nip = NULL; + int pd[2]; + pid_t pid; + + if (pipe(pd) < 0) + return NULL; + switch (pid = fork()) { + case 0: + if (lseek(ip->ib_fd, -(ip->ib_end - ip->ib_cur), + SEEK_CUR) == (off_t)-1) { + int xpd[2]; + if (pipe(xpd) == 0 && fork() == 0) { + ssize_t rd, wo, wt; + close(xpd[0]); + for (;;) { + rd = ip->ib_end - ip->ib_cur; + wo = wt = 0; + do { + if ((wo = write(xpd[1], + &ip->ib_cur[wt], + rd - wt)) + <= 0) { + if (errno == EINTR) + continue; + _exit(0); + } + wt += wo; + } while (wt < rd); + if (ib_read(ip) == EOF) + break; + ip->ib_cur--; + } + _exit(0); + } else { + close(xpd[1]); + dup2(xpd[0], 0); + close(xpd[0]); + } + } else { + if (ip->ib_fd) + dup2(ip->ib_fd, 0); + } + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); + dup2(pd[1], 1); + close(pd[0]); + close(pd[1]); + execlp(arg0, arg0, arg1, NULL); + fprintf(stderr, "%s: could not exec %s\n", progname, arg0); + _exit(0177); + /*NOTREACHED*/ + case -1: + fprintf(stderr, "%s: cannot fork()\n", progname); + status = 2; + return NULL; + default: + close(pd[1]); + nip = ib_alloc(pd[0], 0); + nip->ib_pid = pid; + return nip; + } +} + +/* + * Report a matching line. + */ +void +report(const char *line, size_t llen, off_t bcnt, int addnl) +{ + if (filename && !hflag) + printf("%s:", filename); +#ifdef LONGLONG + if (bflag) + printf("%llu:", (long long)bcnt); + if (nflag) + printf("%llu:", (long long)lineno); +#else /* !LONGLONG */ + if (bflag) + printf("%lu:", (long)bcnt); + if (nflag) + printf("%lu:", (long)lineno); +#endif /* !LONGLONG */ + if (line && llen) + fwrite(line, sizeof *line, llen, stdout); + if (addnl) + putchar('\n'); +} + +/* + * Check line for match. If necessary, the line gets NUL-terminated (so + * its address range must be writable then). When ignoring character case, + * a lower-case-only copy of the line is made instead. If a match is found, + * statistics are printed. Returns 1 if main loop shall terminate, 0 else. + */ +static int +matchline(char *line, size_t sz, int putnl, struct iblok *ip) +{ + size_t csz = sz; + int terminate = 0; + char lbuf[512], *abuf = NULL, *cline = line; + + if (iflag && (matchflags & MF_LOCONV)) { + if (sz >= sizeof lbuf - 1) { + abuf = smalloc(sz + 1); + cline = abuf; + } else + cline = lbuf; + csz = loconv(cline, line, sz); + cline[csz] = '\0'; + } else if (matchflags & MF_NULTERM) + cline[sz] = '\0'; + lineno++; + if (match(cline, csz) ^ vflag) { + lmatch++; + if (qflag == 0) { + if (status == 1) + status = 0; + if (lflag) { + puts(filename ? filename : stdinmsg); + } else if (!cflag) + report(line, sz, (ib_offs(ip)-1) / BSZ, putnl); + } else + exit(0); + if (qflag || lflag) + terminate = 1; + } + if (abuf) + free(abuf); + return terminate; +} + +/* + * Check all lines within ip->ib_cur and last which contains the last + * newline. If the main loop shall terminate, 1 is returned. + */ +static int +gn_range(struct iblok *ip, char *last) +{ + char *nl; + + while ((nl = memchr(ip->ib_cur, '\n', last + 1 - ip->ib_cur)) != NULL) { + if (matchline(ip->ib_cur, nl - ip->ib_cur, 1, ip)) + return 1; + if (nl == last) + return 0; + ip->ib_cur = nl + 1; + } + return 0; +} + +/* + * Main grep routine. The line buffer herein is only used for overlaps + * between file buffer fills. + */ +static struct iblok * +grep(struct iblok *ip) +{ + char *line = NULL; /* line buffer */ + register char *lastnl; /* last newline in file buffer */ + size_t sz = 0; /* length of line in line buffer */ + char *cp; + int hadnl; /* lastnl points to newline char */ + int oom = 0; /* got out of memory */ + + lineno = lmatch = 0; + if (ib_read(ip) == EOF) + goto endgrep; + ip->ib_cur--; + if (zflag) { + struct iblok *np; + for (;;) { + sz = ip->ib_end - ip->ib_cur; + if (sz > 3 && memcmp(ip->ib_cur, "BZh", 3) == 0) + np = redirect(ip, "bzip2", "-cd"); + else if (sz > 2 && + memcmp(ip->ib_cur, "\37\235", 2) == 0) + np = redirect(ip, "zcat", NULL); + else if (sz > 2 && + memcmp(ip->ib_cur, "\37\213", 2) == 0) + np = redirect(ip, "gzip", "-cd"); + else + break; + if (np == NULL) + break; + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); + ip = np; + if (ib_read(ip) == EOF) + goto endgrep; + ip->ib_cur--; + } + } + for (;;) { + for (lastnl = ip->ib_end - 1; + *lastnl != '\n' && lastnl > ip->ib_cur; + lastnl--); + if (hadnl = (ip->ib_cur < ip->ib_end && *lastnl == '\n')) + if (range(ip, lastnl)) + break; + if (lastnl < ip->ib_end - hadnl) { + /* + * Copy the partial line from file buffer to line + * buffer. Allocate enough space to zero-terminate + * the line later if necessary. + */ + sz = ip->ib_end - lastnl - hadnl; + line = smalloc(sz + 1); + memcpy(line, lastnl + hadnl, sz); + ip->ib_cur = lastnl + hadnl; + } else + line = NULL; +nextbuf: + if (ib_read(ip) == EOF) { + if (line) { + matchline(line, sz, sus, ip); + free(line); + line = NULL; + sz = 0; + } + break; + } + ip->ib_cur--; + if (line) { + /* + * Append the partial line at the beginning of the + * file buffer to the line buffer. + */ + size_t oldsz = sz; + if ((cp = memchr(ip->ib_cur, '\n', + ip->ib_end - ip->ib_cur)) == NULL) { + char *nline; + /* + * Ugh. This is really a huge line. Store the + * entire file buffer in the line buffer and + * read the next part of the file. + */ + sz += ip->ib_end - ip->ib_cur; + if ((nline = realloc(line, sz + 1)) == NULL) { + sz = oldsz; + cp = &ip->ib_end[-1]; + oom++; + } else { + line = nline; + memcpy(line + oldsz, ip->ib_cur, + ip->ib_end - ip->ib_cur); + goto nextbuf; + } + } + if ((sz = cp - ip->ib_cur) > 0) { + char *nline; + sz += oldsz; + if ((nline = realloc(line, sz + 1)) == NULL) { + sz = oldsz; + oom++; + } else { + line = nline; + memcpy(line + oldsz, ip->ib_cur, + cp - ip->ib_cur); + } + } else + sz = oldsz; + if (matchline(line, sz, 1, ip)) + break; + free(line); + line = NULL; + sz = 0; + ip->ib_cur = cp + (oom == 0); + oom = 0; + } + } +endgrep: + if (!qflag && cflag) { + if (filename && !hflag) + printf("%s:", filename); +#ifdef LONGLONG + printf("%llu\n", (long long)lmatch); +#else + printf("%lu\n", (long)lmatch); +#endif + } + return ip; +} + +/* + * Grep a named file. + */ +static void +fngrep(const char *fn, int level) +{ + struct iblok *ip; + struct stat st; + int i; + + if (rflag && fn && (level ? rflag : stat)(fn, &st) == 0) { + if (rflag != lstat) { + for (i = 0; i < level; i++) + if (st.st_dev == visited[i].v_dev && + st.st_ino == visited[i].v_ino) + return; + if (level >= vismax) { + vismax += 20; + visited = srealloc(visited, sizeof *visited * + vismax); + } + visited[level].v_dev = st.st_dev; + visited[level].v_ino = st.st_ino; + } + mode: switch (st.st_mode&S_IFMT) { +#define ignoring(t, s) fprintf(stderr, "%s: ignoring %s %s\n", progname, t, s) + case S_IFIFO: + ignoring("named pipe", fn); + return; + case S_IFBLK: + ignoring("block device", fn); + return; + case S_IFCHR: + ignoring("block device", fn); + return; +#ifdef S_IFSOCK + case S_IFSOCK: + ignoring("socket", fn); + return; +#endif /* S_IFSOCK */ + case S_IFLNK: + if (stat(fn, &st) < 0 || (st.st_mode&S_IFMT) == S_IFDIR) + return; + goto mode; + default: + break; + case S_IFDIR: { + char *path; + int pend, psize, i; + DIR *df; + struct dirent *dp; + + if (hflag == 2) + hflag = 0; + if ((df = opendir(fn)) == NULL) { + if (sflag == 0) + fprintf(stderr, "%s: can't open " + "directory %s\n", + progname, fn); + if (!qflag || status == 1) + status = 2; + return; + } + pend = strlen(fn); + path = malloc(psize = pend + 2); + strcpy(path, fn); + path[pend++] = '/'; + while ((dp = readdir(df)) != NULL) { + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + dp->d_name[1] == '.' && + dp->d_name[2] == '\0')) + continue; + i = 0; + do { + if (pend + i >= psize) + path = srealloc(path, + psize += 14); + path[pend+i] = dp->d_name[i]; + } while (dp->d_name[i++]); + filename = path; + fngrep(path, level+1); + } + free(path); + closedir(df); + return; + } + } + } + if (fn) { + if ((ip = ib_open(fn, 0)) == NULL) { + if (sflag == 0) + fprintf(stderr, "%s: can't open %s\n", + progname, fn); + if (!qflag || status == 1) + status = 2; + return; + } + } else + ip = ib_alloc(0, 0); + ip = grep(ip); + if (ip->ib_fd) { + ib_close(ip); + if (zflag && ip->ib_pid) { + int s; + waitpid(ip->ib_pid, &s, 0); + if (s) + status = 2; + } + } else + ib_free(ip); +} + +int +main(int argc, char **argv) +{ + int i, hadpat = 0; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + progname = basename(argv[0]); + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + range = gn_range; + init(); + while ((i = getopt(argc, argv, options)) != EOF) { + switch (i) { + case 'E': + Eflag |= 1; + rc_select(); + break; + case 'F': + if (Eflag&2) + Eflag = 0; + Fflag |= 1; + ac_select(); + break; + case 'b': + bflag = 1; + break; + case 'c': + cflag = 1; + break; + case 'e': + patstring(optarg); + hadpat++; + break; + case 'f': + fflag++; + patfile(optarg); + hadpat++; + break; + case 'h': + hflag = 1; + break; + case 'i': + case 'y': + iflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'r': + rflag = stat; + break; + case 'R': + rflag = lstat; + break; + case 's': + sflag = 1; + break; + case 'v': + vflag = 1; + break; + case 'w': + wflag = 1; + break; + case 'x': + xflag = 1; + break; + case 'z': + zflag = 1; + break; + default: + if (!(Fflag&2)) + usage(); + status = 2; + } + } + if (sus) { + if (Fflag == 2) { + if (sflag) { + optind = 1; + argv[1] = "-s"; + getopt(argc, argv, ""); + usage(); + } + if (qflag) { + optind = 1; + argv[1] = "-q"; + getopt(argc, argv, ""); + usage(); + } + } + if (Fflag && status == 2) + usage(); + if (Eflag == 1 && Fflag == 1 || cflag + lflag + qflag > 1) + usage(); + if (wflag && (Eflag || Fflag)) + usage(); + } + if (cflag) + lflag = 0; + if (hadpat == 0) { + if (optind >= argc) + misop(); + patstring(argv[optind++]); + } else if (e0 == NULL) + patstring(NULL); + build(); + if (optind != argc) { + if (optind + 1 == argc) + hflag = 2; + do { + if (sus && argv[optind][0] == '-' && + argv[optind][1] == '\0') { + filename = NULL; + fngrep(NULL, 0); + } else { + filename = argv[optind]; + fngrep(argv[optind], 0); + } + } while (++optind < argc); + } else { + if (lflag && !sus && (Eflag || Fflag)) + exit(1); + fngrep(NULL, 0); + } + return status; +} diff --git a/grep/grep.h b/grep/grep.h @@ -0,0 +1,146 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)grep.h 1.23 (gritter) 1/4/05> */ + +#include <sys/types.h> +#include <regex.h> + +#include "iblok.h" + +#include "config.h" + +#define BSZ 512 /* block size */ + +/* + * Expression flags. + */ +enum eflags { + E_NONE = 0, /* no flags set */ + E_NL = 1, /* pattern ends with newline */ + E_NULL = 2 /* no pattern, not even an empty one */ +}; + +/* + * List of search expressions; not used for compile() matching. + */ +struct expr { + struct expr *e_nxt; /* next item in list */ + char *e_pat; /* search pattern */ + regex_t *e_exp; /* compiled pattern from regcomp() */ + long e_len; /* pattern length */ + enum eflags e_flg; /* expression flags */ +}; + +/* + * Matcher flags. + */ +enum matchflags { + MF_NULTERM = 01, /* search string must be \0 terminated*/ + MF_LOCONV = 02 /* lower-case search string if -i is set */ +}; + +/* + * Variables in grep.c. + */ +extern int Eflag; /* use EREs */ +extern int Fflag; /* use fixed strings */ +extern int bflag; /* print buffer count */ +extern int cflag; /* print count only */ +extern int fflag; /* had pattern file argument */ +extern int hflag; /* do not print filenames */ +extern int iflag; /* ignore case */ +extern int lflag; /* print filenames only */ +extern int nflag; /* print line numbers */ +extern int qflag; /* no output at all */ +extern int sflag; /* avoid error messages */ +extern int vflag; /* inverse selection */ +extern int wflag; /* search for words */ +extern int xflag; /* match entire line */ +extern int mb_cur_max; /* MB_CUR_MAX */ +#define mbcode (mb_cur_max>1) /* multibyte characters in use */ +extern unsigned status; /* exit status */ +extern off_t lmatch; /* count of matching lines */ +extern off_t lineno; /* current line number */ +extern char *progname; /* argv[0] to main() */ +extern char *filename; /* name of current file */ +extern void (*build)(void); /* compile function */ +extern int (*match)(const char *, size_t); /* comparison */ +extern int (*range)(struct iblok *, char *); /* grep range */ +extern struct expr *e0; /* start of expression list */ +extern enum matchflags matchflags; /* matcher flags */ + +/* + * These differ amongst grep flavors. + */ +extern int sus; /* POSIX.2 command version in use */ +extern char *stdinmsg; /* name for standard input */ +extern char *usagemsg; /* usage string */ +extern char *options; /* for getopt() */ + +/* + * In grep.c. + */ +extern size_t loconv(char *, char *, size_t); +extern void wcomp(char **, long *); +extern void report(const char *, size_t, off_t, int); + +/* + * Flavor dependent. + */ +extern void usage(void); +extern void misop(void); +extern void rc_error(struct expr *, int); +extern void init(void); + +/* + * Traditional egrep only. + */ +extern void eg_select(void); + +/* + * Fgrep only. + */ +extern void ac_select(void); + +/* + * compile()/step()-related. + */ +extern void st_select(void); + +/* + * regcomp()/regexec()-related. + */ +extern void rc_select(void); + +/* + * Not for SVID3 grep. + */ +extern void patstring(char *); +extern void patfile(char *); +extern int nextch(void); +extern void outline(struct iblok *, char *, size_t); diff --git a/grep/grid.c b/grep/grid.c @@ -0,0 +1,50 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)grep.sl 2.51 (gritter) 12/27/06"; +/* SLIST */ +/* +ac.c:static const char sccsid[] USED = "@(#)fgrep.sl 2.10 (gritter) 5/29/05"; +alloc.c: Sccsid @(#)alloc.c 1.3 (gritter) 4/17/03> +alloc.h: Sccsid @(#)alloc.h 1.3 (gritter) 4/17/03> +egrep.y:static const char sccsid[] USED = "@(#)egrep.sl 2.22 (gritter) 5/29/05"; +fgrep.c: Sccsid @(#)fgrep.c 1.12 (gritter) 12/17/04> +ggrep.c: Sccsid @(#)ggrep.c 1.26 (gritter) 1/4/05> +grep.c: Sccsid @(#)grep.c 1.53 (gritter) 12/27/06> +grep.h: Sccsid @(#)grep.h 1.23 (gritter) 1/4/05> +plist.c: Sccsid @(#)plist.c 1.22 (gritter) 12/8/04> +rcomp.c: Sccsid @(#)rcomp.c 1.27 (gritter) 2/6/05> +sus.c: Sccsid @(#)sus.c 1.24 (gritter) 5/29/05> +svid3.c: Sccsid @(#)svid3.c 1.7 (gritter) 4/17/03> +*/ diff --git a/grep/mkfile b/grep/mkfile @@ -0,0 +1,49 @@ +BIN = grep +OBJ = alloc.o grep.o grid.o plist.o rcomp.o sus.o ac.o +LOCAL_CFLAGS = -DSU3 +INSTALL_BIN = grep +INSTALL_MAN1 = grep.1 egrep.1 fgrep.1 +INSTALL_SYMLINK = \ + grep /bin/egrep \ + grep /bin/fgrep +DEPS = libcommon + +<$mkbuild/mk.common + +grep: $OBJ + +config.h:Q: + echo '/* Auto-generated by make. Do not edit! */' >config.h + echo -n "Checking for wchar.h... " + echo '#include <wchar.h>' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#include <wchar.h>' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + echo -n "Checking for wctype.h... " + echo '#include <wctype.h>' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#include <wctype.h>' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + echo -n "Checking for long long... " + echo 'long long foo;' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#define LONGLONG' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + +grep.o: grep.h config.h alloc.h +plist.o: grep.h config.h alloc.h +sus.o: grep.h config.h alloc.h +ac.o: alloc.h grep.h +rcomp.o: grep.h config.h alloc.h diff --git a/grep/plist.c b/grep/plist.c @@ -0,0 +1,213 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)plist.c 1.22 (gritter) 12/8/04> */ + +/* + * Pattern list routines. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#include "grep.h" +#include "alloc.h" + +/* + * Add a pattern starting at the given node of the expression list. + */ +static void +addpat(struct expr **e, char *pat, long len, enum eflags flg) +{ + if (e0) { + (*e)->e_nxt = (struct expr *)smalloc(sizeof **e); + (*e) = (*e)->e_nxt; + } else + e0 = (*e) = (struct expr *)smalloc(sizeof **e); + if (wflag) + wcomp(&pat, &len); + (*e)->e_nxt = NULL; + (*e)->e_pat = pat; + (*e)->e_len = len; + (*e)->e_flg = flg; +} + +/* + * Read patterns from pattern string. In traditional command versions, -f + * overrides all -e and all previous -f options. In POSIX.2 command versions, + * all -e and -f options are cumulated. + */ +void +patstring(char *cp) +{ + struct expr *e = NULL; + char *ep; + int nl; + + if (e0) { + if (sus) + for (e = e0; e->e_nxt; e = e->e_nxt); + else if (fflag) + return; + else + e0 = NULL; + } + if (cp) { + do { + if ((nl = (ep = strchr(cp, '\n')) != NULL) != 0) + *ep = 0; + addpat(&e, cp, ep ? ep - cp : strlen(cp), nl); + cp = ep + 1; + if (nl) + *ep = '\n'; + } while (ep); + } else + addpat(&e, strdup(""), 0, E_NULL); +} + +/* + * Read patterns from file. + */ +void +patfile(char *fn) +{ + struct stat st; + struct expr *e = NULL; + char *cp; + struct iblok *ip; + size_t sz, len; + int nl; + + if ((ip = ib_open(fn, 0)) == NULL || fstat(ip->ib_fd, &st) < 0) { + fprintf(stderr, "%s: can't open %s\n", progname, fn); + exit(2); + } + if (e0) { + if (sus) + for (e = e0; e->e_nxt; e = e->e_nxt); + else + e0 = NULL; + } + while (cp = NULL, sz = 0, + (len = ib_getlin(ip, &cp, &sz, srealloc)) > 0) { + if ((nl = cp[len - 1] == '\n') != 0) + cp[len - 1] = '\0'; + addpat(&e, cp, len - nl, nl); + } + ib_close(ip); +} + +/* + * getc() substitute operating on the pattern list. + */ +int +nextch(void) +{ + static struct expr *e; + static char *cp; + static long len; + static int oneof; + wchar_t wc; + int n; + + if (oneof) + return EOF; + if (e == NULL) { + e = e0; + if (e->e_flg & E_NULL) { + oneof++; + return EOF; + } + } + if (cp == NULL) { + cp = e->e_pat; + len = e->e_len; + } + if (mbcode && *cp & 0200) { + if ((n = mbtowc(&wc, cp, MB_LEN_MAX)) < 0) { + fprintf(stderr, "%s: illegal byte sequence\n", + progname); + exit(1); + } + cp += n; + len -= n; + } else { + wc = *cp++ & 0377; + len--; + } + if (len >= 0) + return iflag ? mbcode && wc & ~(wchar_t)0177 ? + towlower(wc) : tolower(wc) : wc; + cp = NULL; + n = e->e_flg & E_NL; + if ((e = e->e_nxt) == NULL) { + oneof++; + if (!n) + return EOF; + } + return '\n'; +} + +/* + * Print matching line based on ip->ib_cur and moff. Advance ip->ib_cur to start + * of next line. Used from special rangematch functions. + */ +void +outline(struct iblok *ip, char *last, size_t moff) +{ + register char *sol, *eol; /* start and end of line */ + + if (qflag == 0) { + if (status == 1) + status = 0; + if (lflag) { + puts(filename ? filename : stdinmsg); + } else { + lmatch++; + sol = ip->ib_cur + moff; + if (*sol == '\n' && sol > ip->ib_cur) + sol--; + while (sol > ip->ib_cur && *sol != '\n') + sol--; + if (sol > ip->ib_cur) + sol++; + ip->ib_cur += moff; + for (eol = ip->ib_cur; eol <= last + && *eol != '\n'; eol++); + if (!cflag) + report(sol, eol - sol, ib_offs(ip) / BSZ, 1); + ip->ib_cur = eol + 1; + } + } else /* qflag != 0 */ + exit(0); +} diff --git a/grep/rcomp.c b/grep/rcomp.c @@ -0,0 +1,350 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)rcomp.c 1.27 (gritter) 2/6/05> */ + +/* + * Code involving POSIX.2 regcomp()/regexpr() routines. + */ + +#include "grep.h" +#include "alloc.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <mbtowi.h> + +static int emptypat; + +#ifdef UXRE +#include <regdfa.h> +static int rc_range(struct iblok *, char *); +static int rc_rangew(struct iblok *, char *); +#endif + +/* + * Check whether line matches any pattern of the pattern list. + */ +static int +rc_match(const char *str, size_t sz) +{ +#ifndef UXRE + struct expr *e; +#endif + regmatch_t pmatch[1]; + int gotcha = 0; + + if (emptypat) { + if (xflag) { + if (*str == '\0') + return 1; + } else + return 1; + } +#ifdef UXRE + if (e0->e_exp) + gotcha = (regexec(e0->e_exp, str, 1, pmatch, 0) == 0); +#else /* !UXRE */ + for (e = e0; e; e = e->e_nxt) { + if (e->e_exp) { + gotcha = (regexec(e->e_exp, str, 1, pmatch, 0) == 0); + if (gotcha) + break; + } + } +#endif /* !UXRE */ + if (gotcha) + if (!xflag || (pmatch[0].rm_so == 0 + && pmatch[0].rm_eo == sz)) + return 1; + return 0; +} + +/* + * Compile a pattern structure using regcomp(). + */ +static void +rc_build(void) +{ + int rerror = REG_BADPAT; + int rflags = 0; + size_t sz; +#ifdef UXRE + char *pat, *cp; +#endif /* UXRE */ + struct expr *e; + + if ((e0->e_flg & E_NULL) == 0) { + for (sz = 0, e = e0; e; e = e->e_nxt) { + if (e->e_len > 0) + sz += e->e_len + 1; + else + emptypat = 1; + } + } else + sz = 1; + if ((e0->e_flg & E_NULL || emptypat) && sus == 0) + rc_error(e0, rerror); + if (sz == 0 || (emptypat && xflag == 0)) { + e0->e_exp = NULL; + return; + } +#ifdef UXRE + pat = smalloc(sz); + for (cp = pat, e = e0; e; e = e->e_nxt) { + if (e->e_len > 0) { + memcpy(cp, e->e_pat, e->e_len); + cp[e->e_len] = '\n'; + cp = &cp[e->e_len + 1]; + } + } + pat[sz - 1] = '\0'; + if (iflag) + rflags |= REG_ICASE; + if (Eflag) + rflags |= (sus ? REG_EXTENDED : REG_OLDERE|REG_NOI18N) | + REG_MTPARENBAD; + else { + rflags |= REG_ANGLES; + if (sus >= 3) + rflags |= REG_AVOIDNULL; + } + if (xflag) + rflags |= REG_ONESUB; + else + rflags |= REG_NOSUB; + if ((e = e0)->e_nxt) + rflags |= REG_NLALT; + e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); + if ((rerror = regcomp(e->e_exp, pat, rflags)) != 0) + rc_error(e, rerror); + free(pat); + if (!xflag && e->e_exp->re_flags & REG_DFA) + range = mbcode ? rc_rangew : rc_range; +#else /* !UXRE */ + if (iflag) + rflags |= REG_ICASE; + if (Eflag) + rflags |= REG_EXTENDED; + if (!xflag) + rflags |= REG_NOSUB; + for (e = e0; e; e = e->e_nxt) { + e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); + if ((rerror = regcomp(e->e_exp, e->e_pat, rflags)) != 0) + rc_error(e, rerror); + } +#endif /* !UXRE */ +} + +void +rc_select(void) +{ + build = rc_build; + match = rc_match; + matchflags |= MF_NULTERM; + matchflags &= ~MF_LOCONV; +} + +/* + * Derived from Unix 32V /usr/src/cmd/egrep.y + * + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef UXRE +/* + * Range search for singlebyte locales using the modified UNIX(R) Regular + * Expression Library DFA. + */ +static int +rc_range(struct iblok *ip, char *last) +{ + char *p; + int c, cstat, nstat; + Dfa *dp = e0->e_exp->re_dfa; + + p = ip->ib_cur; + lineno++; + cstat = dp->anybol; + if (dp->acc[cstat]) + goto found; + for (;;) { + if ((nstat = dp->trans[cstat][*p & 0377]) == 0) { + /* + * '\0' is used to indicate end-of-line. If a '\0' + * character appears in input, it matches '$' but + * the DFA remains in dead state afterwards; there + * is thus no need to handle this condition + * specially to get the same behavior as in plain + * regexec(). + */ + if ((c = *p & 0377) == '\n') + c = '\0'; + if ((nstat = regtrans(dp, cstat, c, 1)) == 0) + goto fail; + dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; + } + if (dp->acc[cstat = nstat - 1]) { + found: for (;;) { + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + fail: ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + if ((p = ip->ib_cur) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol] == 0) + goto brk2; + } + } + if (*p++ == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol]) + goto found; + } + brk2:; + } +} + +/* + * Range search for multibyte locales using the modified UNIX(R) Regular + * Expression Library DFA. + */ +static int +rc_rangew(struct iblok *ip, char *last) +{ + char *p; + int n, cstat, nstat; + wint_t wc; + Dfa *dp = e0->e_exp->re_dfa; + + p = ip->ib_cur; + lineno++; + cstat = dp->anybol; + if (dp->acc[cstat]) + goto found; + for (;;) { + if (*p & 0200) { + if ((n = mbtowi(&wc, p, last + 1 - p)) < 0) { + n = 1; + wc = WEOF; + } + } else { + wc = *p; + n = 1; + } + if ((wc & ~(wchar_t)(NCHAR-1)) != 0 || + (nstat = dp->trans[cstat][wc]) == 0) { + /* + * '\0' is used to indicate end-of-line. If a '\0' + * character appears in input, it matches '$' but + * the DFA remains in dead state afterwards; there + * is thus no need to handle this condition + * specially to get the same behavior as in plain + * regexec(). + */ + if (wc == '\n') + wc = '\0'; + if ((nstat = regtrans(dp, cstat, wc, mb_cur_max)) == 0) + goto fail; + dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; + } + if (dp->acc[cstat = nstat - 1]) { + found: for (;;) { + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + fail: ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + if ((p = ip->ib_cur) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol] == 0) + goto brk2; + } + } + p += n; + if (p[-n] == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol]) + goto found; + } + brk2:; + } +} +#endif /* UXRE */ diff --git a/grep/sus.c b/grep/sus.c @@ -0,0 +1,133 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)sus.c 1.24 (gritter) 5/29/05> */ + +/* + * Code for POSIX.2 command version only. + */ + +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "alloc.h" +#include "grep.h" + +#if defined (SU3) +int sus = 3; +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char su3id[] USED = "@(#)grep_su3.sl 1.24 (gritter) 5/29/05"; +#else +int sus = 1; +#endif +char *stdinmsg = "(standard input)"; + +/* + * Usage message. + */ +void +usage(void) +{ + char *sEF, *sq, *ss; + + if (*progname == 'f') { + sq = ""; + ss = ""; + } else { + sq = "|-q"; + ss = "s"; + } + if (*progname == 'f' || *progname == 'e') + sEF = ""; + else + sEF = "[-E|-F] "; + fprintf(stderr, "%s: Usage:\n\ + %s[-c|-l%s] [-bhin%svx] pattern [file ...]\n\ + %s[-c|-l%s] [-bhin%svx] -e pattern ... [-f file ...] [file ...]\n\ + %s[-c|-l%s] [-bhin%svx] -f file ... [-e pattern ...] [file ...]\n", + progname, + sEF, sq, ss, + sEF, sq, ss, + sEF, sq, ss); + exit(2); +} + +void +misop(void) +{ + usage(); +} + +void +rc_error(struct expr *e, int rerror) +{ + char *regerrs; + size_t resz; + + resz = regerror(rerror, e->e_exp, NULL, 0) + 1; + regerrs = smalloc(resz); + regerror(rerror, e->e_exp, regerrs, resz); + fprintf(stderr, "%s: RE error: %s\n", progname, regerrs); + exit(2); +} + +void +init(void) +{ + switch (*progname) { + case 'e': + Eflag = 2; + rc_select(); + options = "EFbce:f:hilnqrRsvxyz"; + break; + case 'f': + Fflag = 2; + ac_select(); + options = "Fbce:f:hilnqrRsvxyz"; + break; + default: + rc_select(); + options = "EFbce:f:hilnqrRsvwxyz"; + } +} + +void +eg_select(void) +{ +} + +void +st_select(void) +{ +} diff --git a/hd/hd.1 b/hd/hd.1 @@ -0,0 +1,160 @@ +.\" +.\" Sccsid @(#)hd.1 1.8 (gritter) 12/5/04 +.TH HD 1XNX "12/5/04" "Heirloom Toolchest" "XENIX System Compatibility" +.SH NAME +hd \- (XENIX) display files in hexadecimal format +.SH SYNOPSIS +\fBhd\fR +[\fB\-acbwlAxdov\fR] +[\fB\-t\fR] +[\fB\-s\fI\ offset\fR] +[\fB\-n\fI\ count\fR] +[\fIfiles\fR] +.SH DESCRIPTION +The +.I hd +command +displays the contents +of the given +.I files +(or of standard input +if no such argument is present) +in hexadecimal, decimal or octal format, +16 bytes per line. +It optionally displays printable ASCII characters in the rightmost column. +The file address is printed in the leftmost column. +.PP +Output format selection is done +with options; each format selection option consists +(optionally) of a format selector and a base selector. +.PP +The format selectors are as follows; +if no format selector is present, +the base selector is applied to all available format selectors: +.TP +.B \-b +Separates the input in bytes; +each byte is printed as a numerical value. +.TP +.B \-c +Separates the input in bytes; +each byte that is also a printable character +is printed as such, +certain characters are printed as C language escapes, +others as a numerical value. +.TP +.B \-w +Separates the input in 16-bit words; +each word is printed as a numerical value. +.TP +.B \-l +Separates the input in 32-bit long words; +each word is printed as a numerical value. +.PP +The following base selectors are accepted; +if the base selector is omitted, +all base selectors are applied to the chosen format selectors: +.TP +.B \-o +Selects octal output. +.TP +.B \-d +Selects unsigned decimal output. +.TP +.B \-x +Selects hexadecimal output. +.PP +The format of the addresses at the left +can also be selected by using the +.TP +.B \-a +option followed by one of the base selectors; +only one base selector is applied. +The default address base is hexadecimal. +.PP +The following options also affect the output format: +.TP +.B \-A +Causes bytes that are printable characters to be displayed +as such at the right; nonprintable bytes are printed as `.'. +.TP +.B \-t +Text file format; overrides all other format options except +.IR \-a . +Each line of the input is preceded by its address; +printable characters are displayed as such, +except for `\e', `^', and '~', which are prefixed by a `\e' character. +ASCII control characters are prefixed by `^'. +Bytes with the highest bit set that do not form a printable character +are indicated by '~', +followed by the corresponding ASCII character as above. +.TP +.B \-v +Unless +.I \-t +is used, +a set of immediately following indentical 16-byte sets +is abbreviated by printing `*' for the second and further ones. +This option inhibits this behavior +and causes all lines to be displayed. +This option is an extension. +.PP +If no output selection is present, the default is +.IR \-bxA . +.PP +The following options affect the handling of input files: +.TP +\fB\-s\fI\ offset\fR +Selects a starting offset within each file. +.I Offset +is interpreted as a decimal value +unless prefixed by +.BR 0 , +which causes interpretation as an octal value, +or +.BR 0x , +which causes interpretation as a hexadecimal value. +The suffixes +.BR w , +.BR l , +.BR b , +and +.B k +cause multiplication by +2 (`words'), +4 (`long words'), +512 (`blocks'), +or +1024 (`kilobytes'), +respectively. +Value and suffix may be separated by a `*' character +to suppress interpretation of `b' as a hexadecimal digit. +.TP +\fB\-n\fI\ count\fR +Causes only +.I count +characters of the file to be displayed; +.I count +is handled as the +.I offset +for +.I \-s +described above. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Selects the set of printable single-byte characters +for the +.I \-A +and +.I \-c +options +and the set of printable characters for the +.I \-t +option. +.SH "SEE ALSO" +od(1) diff --git a/hd/hd.c b/hd/hd.c @@ -0,0 +1,715 @@ +/* + * hd - display files in hexadecimal format + * + * Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)hd.sl 1.12 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> +#include <stdarg.h> +#include <locale.h> +#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <inttypes.h> +#include <limits.h> +#include "atoll.h" +#include "mbtowi.h" + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +#ifdef _IO_putc_unlocked +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif /* __GLIBC__ */ + +enum base { + BASE_0 = 00, + BASE_X = 01, + BASE_D = 02, + BASE_O = 04 +}; + +union block { + int8_t b_c[16]; + int16_t b_w[8]; + int32_t b_l[4]; +}; + +static const struct fm { + int f_fmt; + enum base f_base; + char f_width; + char f_align[3]; + const char *f_prf; +} ofmt[] = { + { 'b', BASE_X, 2, {2,5,11}, "%02x" }, + { 'b', BASE_D, 3, {3,7,15}, "%3u" }, + { 'b', BASE_O, 3, {3,7,15}, "%03o" }, + { 'c', BASE_X, 2, {2,5,11}, "%02x" }, + { 'c', BASE_D, 3, {3,7,15}, "%3u" }, + { 'c', BASE_O, 3, {3,7,15}, "%03o" }, + { 'w', BASE_X, 4, {0,4, 9}, "%04x" }, + { 'w', BASE_D, 5, {0,5,11}, "%5u" }, + { 'w', BASE_O, 6, {0,6,13}, "%06o" }, + { 'l', BASE_X, 8, {0,0, 8}, "%08lx" }, + { 'l', BASE_D, 10, {0,0,10}, "%10lu" }, + { 'l', BASE_O, 11, {0,0,11}, "%011lo" }, + { 0, BASE_0, 0, {0,0, 0}, NULL } +}; + +static int Aflag; /* print ASCII at right */ +static enum base aflag; /* address format specifier */ +static enum base bflag; /* byte format specifier */ +static enum base cflag; /* print ASCII at center */ +static enum base lflag; /* long (32 bit) format specifier */ +static long long nflag; /* number of bytes to process */ +static long long sflag; /* start offset */ +static int tflag; /* print text file */ +static int vflag; /* no '*' for identical lines */ +static enum base wflag; /* word (16 bit) format specifier */ +static char align[3]; +static const char *progname; +static int status; +static int mb_cur_max; + +static void usage(void); +static void flag(int); +static void base(enum base, enum base *); +static long long count(const char *); +static void usage(void); +static void diag(const char *, ...); +static void hd(FILE *); +static void prna(long long); +static void prnb(union block *, int); +static void line(union block *, int, int, enum base, int); +static const struct fm *getfmt(int, enum base); +static void getalign(void); +static void prnt(FILE *, long long); +static void prnc(int); +static char *wcget(FILE *fp, wint_t *wc, int *len); + +int +main(int argc, char **argv) +{ + FILE *fp; + int i, j; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + switch (argv[i][1]) { + case 's': + if (argv[i][2]) + sflag = count(&argv[i][2]); + else if (++i < argc) + sflag = count(argv[i]); + else + usage(); + break; + case 'n': + if (argv[i][2]) + nflag = count(&argv[i][2]); + else if (++i < argc) + nflag = count(argv[i]); + else + usage(); + break; + default: + for (j = 1; argv[i][j]; j++) + flag(argv[i][j]&0377); + flag(0); + } + } + if (tflag && (Aflag|bflag|cflag|lflag|wflag)) + diag("-t flag overrides other flags"); + if ((Aflag|bflag|cflag|lflag|wflag) == 0) + Aflag = 1; + if ((bflag|cflag|lflag|wflag) == 0) + bflag = BASE_X; + getalign(); + if (i < argc) { + j = i+1 < argc; + do { + if (access(argv[i], R_OK) < 0) { + diag("cannot access %s", argv[i]); + continue; + } + if ((fp = fopen(argv[i], "r")) == NULL) { + diag("open of %s failed", argv[i]); + continue; + } + if (j) + printf("%s:\n", argv[i]); + hd(fp); + fclose(fp); + if (i+1 < argc) + printf("\n"); + } while (++i < argc); + } else + hd(stdin); + return status; +} + +static void +flag(int c) +{ + static enum base *basep; + + switch (c) { + case '\0': + if (basep && basep != &aflag && *basep == BASE_0) + *basep |= BASE_O|BASE_D|BASE_X; + basep = NULL; + break; + case 'a': + basep = &aflag; + break; + case 'b': + basep = &bflag; + break; + case 'w': + basep = &wflag; + break; + case 'l': + basep = &lflag; + break; + case 'c': + basep = &cflag; + break; + case 'A': + Aflag = 1; + break; + case 'x': + base(BASE_X, basep); + break; + case 'd': + base(BASE_D, basep); + break; + case 'o': + base(BASE_O, basep); + break; + case 't': + tflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + usage(); + } +} + +static void +base(enum base b, enum base *basep) +{ + if (basep) { + if (basep == &aflag) + *basep = b; + else + *basep |= b; + } else { + if (aflag == BASE_0) + aflag |= b; + cflag |= b; + bflag |= b; + wflag |= b; + lflag |= b; + } +} + +static long long +count(const char *s) +{ + long long c; + int bs = 10; + char *x; + + if (s[0] == '0' && s[1] == 'x') { + bs = 16; + s += 2; + } else if (s[0] == '0') { + bs = 8; + s++; + } + c = strtoll(s, &x, bs); + s = x; + if (*s == '*') + s++; + switch (*s) { + case 'w': + c *= 2; + s++; + break; + case 'l': + c *= 4; + s++; + break; + case 'b': + c *= 512; + s++; + break; + case 'k': + c *= 1024; + s++; + break; + } + if (*s) { + diag("bad count/offset value"); + exit(3); + } + return c; +} + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-acbwlAxdo] [-t] [-s offset[*][wlbk]] " + "[-n count[*][wlbk]] [file] ...\n", + progname); + exit(2); +} + +static void +diag(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + status |= 1; +} + +static void +hd(FILE *fp) +{ + long long of = 0, rd = 0; + union block b, ob; + size_t n, m, on = 0; + int star = 0; + + if (sflag) + while (of < sflag) { + getc(fp); + of++; + } + if (tflag) { + prnt(fp, of); + return; + } + do { + if (nflag == 0 || rd + sizeof b.b_c < nflag) + m = sizeof b.b_c; + else + m = nflag - rd; + if ((n = fread(b.b_c, 1, m, fp)) > 0) { + if (!vflag && n==on && memcmp(b.b_c, ob.b_c, n) == 0) { + if (star == 0) + printf("*\n"); + star = 1; + } else { + star = 0; + prna(of); + if (n < sizeof b.b_c) + memset(&b.b_c[n], 0, sizeof b.b_c - n); + prnb(&b, n); + } + } + rd += n; + of += n; + on = n; + ob = b; + } while (n == m && (nflag == 0 || rd < nflag)); + prna(of); + putchar('\n'); +} + +static void +prna(long long n) +{ + switch (aflag) { + case BASE_O: + printf("%06llo", n); + break; + case BASE_D: + printf("%05llu", n); + break; + case BASE_0: + case BASE_X: + printf("%04llx", n); + break; + } +} + +static void +prnb(union block *bp, int n) +{ + int cnt = 0; + + if (cflag&BASE_X) + line(bp, n, 'c', BASE_X, cnt++); + if (cflag&BASE_D) + line(bp, n, 'c', BASE_D, cnt++); + if (cflag&BASE_O) + line(bp, n, 'c', BASE_O, cnt++); + if (bflag&BASE_X) + line(bp, n, 'b', BASE_X, cnt++); + if (bflag&BASE_D) + line(bp, n, 'b', BASE_D, cnt++); + if (bflag&BASE_O) + line(bp, n, 'b', BASE_O, cnt++); + if (wflag&BASE_X) + line(bp, n, 'w', BASE_X, cnt++); + if (wflag&BASE_D) + line(bp, n, 'w', BASE_D, cnt++); + if (wflag&BASE_O) + line(bp, n, 'w', BASE_O, cnt++); + if (lflag&BASE_X) + line(bp, n, 'l', BASE_X, cnt++); + if (lflag&BASE_D) + line(bp, n, 'l', BASE_D, cnt++); + if (lflag&BASE_O) + line(bp, n, 'l', BASE_O, cnt++); +} + +static void +line(union block *bp, int n, int fmt, enum base base, int cnt) +{ + int c, i, j, k, col = 0; + const char *cp; + const struct fm *fmp; + + putchar('\t'); + i = 0; + switch (fmt) { + case 'l': + fmp = getfmt('l', base); + for (j = i/4; j < (n>>2); j++, i += 4) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[2]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf,(long)(bp->b_l[j]&0xffffffff)); + } + if (i == n) + break; + /*FALLTHRU*/ + case 'w': + fmp = getfmt('w', base); + for (j = i/2; j < (n>>1); j++, i += 2) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[1]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf, (int)(bp->b_w[j]&0177777)); + } + if (i == n) + break; + /*FALLTHRU*/ + case 'b': + fmp = getfmt('b', base); + for (j = i; j < n; j++, i++) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[0]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf, bp->b_c[j]&0377); + } + break; + case 'c': + fmp = getfmt('c', base); + for (i = 0; i < n; i++) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[0]; k++) { + putchar(' '); + col++; + } + c = bp->b_c[i]&0377; + cp = NULL; + if (c == '\b') + cp = "\\b"; + else if (c == '\t') + cp = "\\t"; + else if (c == '\n') + cp = "\\n"; + else if (c == '\f') + cp = "\\f"; + else if (c == '\r') + cp = "\\r"; + else if (!isprint(c)) { + col += printf(fmp->f_prf, c); + } else { + if (base != BASE_X) { + putchar(' '); + col++; + } + col += printf(" %c", c); + } + if (cp) { + if (base != BASE_X) { + putchar(' '); + col++; + } + printf(cp); + } + } + break; + } + if (cnt == 0 && Aflag) { + while (col++ < 51) + putchar(' '); + for (i = 0; i < n; i++) { + if ((bp->b_c[i]&0340) == 0 || bp->b_c[i] == 0177 || + !isprint(bp->b_c[i]&0377)) + putchar('.'); + else + putchar(bp->b_c[i]&0377); + } + } + putchar('\n'); +} + +static const struct fm * +getfmt(int fmt, enum base base) +{ + int i; + + for (i = 0; ofmt[i].f_fmt; i++) + if (ofmt[i].f_fmt == fmt && ofmt[i].f_base == base) + return &ofmt[i]; + return NULL; +} + +static void +getalign(void) +{ + int i, j; + enum base *basep; + + for (i = 0; ofmt[i].f_fmt; i++) { + switch (ofmt[i].f_fmt) { + case 'b': + basep = &bflag; + break; + case 'c': + basep = &cflag; + break; + case 'w': + basep = &wflag; + break; + case 'l': + basep = &lflag; + break; + default: + basep = NULL; + } + if (basep && *basep & ofmt[i].f_base) + for (j = 0; j < sizeof align; j++) + if (ofmt[i].f_align[j] > align[j]) + align[j] = ofmt[i].f_align[j]; + } +} + +static void +prnt(FILE *fp, long long of) +{ + wint_t wc; + char b, *mb; + int c, lastc = '\n', n; + long long rd = 0; + + while ((nflag == 0 || rd < nflag)) { + if (mb_cur_max > 1) { + if ((mb = wcget(fp, &wc, &n)) == NULL) + break; + } else { + if ((c = getc(fp)) == EOF) + break; + b = wc = c; + mb = &b; + n = 1; + } + if (lastc == '\n') { + prna(of); + putchar('\t'); + } + of += n, rd += n; + if (n == 1) { + c = *mb&0377; + lastc = c; + if (wc != WEOF && isprint(c) && c != '\\' && + c != '^' && c != '~') + putchar(c); + else + prnc(c); + if (lastc == '\n') + putchar('\n'); + } else { + lastc = c = EOF; + if (wc != WEOF && iswprint(wc)) + while (n--) { + putchar(*mb&0377); + mb++; + } + else + while (n--) { + prnc(*mb&0377); + mb++; + } + } + } + if (lastc != '\n') + putchar('\n'); + prna(of); + putchar('\n'); +} + +static void +prnc(int c) +{ + if (c == 0177 || c == 0377) { + printf("\\%o", c); + return; + } + if (c & 0200) { + putchar('~'); + c &= 0177; + } + if (c < 040) { + putchar('^'); + c |= 0100; + } + if (c == '\\' || c == '~' || c == '^') + putchar('\\'); + putchar(c); +} + +static char * +wcget(FILE *fp, wint_t *wc, int *len) +{ + static char mbuf[MB_LEN_MAX+1]; + static char *mcur, *mend; + static int incompl; + size_t rest; + int c, i, n; + + i = 0; + rest = mend - mcur; + if (rest && mcur > mbuf) { + do + mbuf[i] = mcur[i]; + while (i++, --rest); + } else if (incompl) { + incompl = 0; + *wc = WEOF; + mend = mcur = NULL; + return NULL; + } + if (i == 0) { + c = getc(fp); + if (c == EOF) { + *wc = WEOF; + mend = mcur = NULL; + return NULL; + } + mbuf[i++] = c; + } + if (mbuf[0] & 0200) { + while (mbuf[i-1] != '\n' && i < mb_cur_max && + incompl == 0) { + c = getc(fp); + if (c != EOF) + mbuf[i++] = c; + else + incompl = 1; + } + n = mbtowi(wc, mbuf, i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = mbuf[0]; + *len = n = 1; + } + mcur = &mbuf[*len]; + mend = &mcur[i - *len]; + return mbuf; +} diff --git a/hd/mkfile b/hd/mkfile @@ -0,0 +1,7 @@ +BIN = hd +OBJ = hd.o +INSTALL_BIN = hd +INSTALL_MAN1 = hd.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/lex/allprint.c b/lex/allprint.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "allprint.c 6.11 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)allprint.c 1.4 (gritter) 11/27/05 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#ifdef __sun +#include <sys/euc.h> +#include <widec.h> +#endif +#include <wctype.h> +#include <wchar.h> + +extern FILE *yyout; + +#ifndef JLSLEX +#define CHR char +#endif + +#ifdef WOPTION +#define CHR wchar_t +#define sprint sprint_w +#define allprint allprint_w +#endif + +#ifdef EOPTION +#define CHR wchar_t +#endif + +void +allprint(CHR c) +{ + switch (c) { + case '\n': + fprintf(yyout, "\\n"); + break; + case '\t': + fprintf(yyout, "\\t"); + break; + case '\b': + fprintf(yyout, "\\b"); + break; + case ' ': + fprintf(yyout, "\\_"); + break; + default: + if (!iswprint(c)) + fprintf(yyout, "\\x%-2x", (int)c); + else + putwc(c, yyout); + break; + } +} + +void +sprint(CHR *s) +{ + while (*s) + allprint(*s++); +} diff --git a/lex/depsinc.mk b/lex/depsinc.mk @@ -0,0 +1 @@ +LEX = $lex_DEPDIR/lex -Y$lex_DEPDIR diff --git a/lex/getopt.c b/lex/getopt.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)getopt.c 1.10 (gritter) 12/16/07 + */ +/* from OpenSolaris "getopt.c 1.23 05/06/08 SMI" */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * See getopt(3C) and SUS/XPG getopt() for function definition and + * requirements. + * + * This actual implementation is a bit looser than the specification + * as it allows any character other than ':' to be used as an option + * character - The specification only guarantees the alnum characters + * ([a-z][A-Z][0-9]). + */ + +#include <sys/types.h> +#include <string.h> +#include <stdio.h> + +extern ssize_t write(int, const void *, size_t); + +char *optarg = NULL; +int optind = 1; +int opterr = 1; +int optopt = 0; + +#define ERR(s, c) err(s, c, optstring, argv[0]) +static void +err(const char *s, int c, const char *optstring, const char *argv0) +{ + char errbuf[256], *ep = errbuf; + const char *cp; + + if (opterr && optstring[0] != ':') { + for (cp = argv0; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = ": "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = s; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = " -- "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = c; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = '\n'; + write(2, errbuf, ep - errbuf); + } +} + +/* + * getopt_sp is required to keep state between successive calls to getopt() + * while extracting aggregated options (ie: -abcd). Hence, getopt() is not + * thread safe or reentrant, but it really doesn't matter. + * + * So, why isn't this "static" you ask? Because the historical Bourne + * shell has actually latched on to this little piece of private data. + */ +int getopt_sp = 1; + +/* + * Determine if the specified character (c) is present in the string + * (optstring) as a regular, single character option. If the option is found, + * return a pointer into optstring pointing at the option character, + * otherwise return null. The character ':' is not allowed. + */ +static char * +parse(const char *optstring, const char c) +{ + char *cp = (char *)optstring; + + if (c == ':') + return (NULL); + do { + if (*cp == c) + return (cp); + } while (*cp++ != '\0'); + return (NULL); +} + +/* + * External function entry point. + */ +int +getopt(int argc, char *const *argv, const char *optstring) +{ + char c; + char *cp; + + /* + * Has the end of the options been encountered? The following + * implements the SUS requirements: + * + * If, when getopt() is called: + * argv[optind] is a null pointer + * *argv[optind] is not the character '-' + * argv[optind] points to the string "-" + * getopt() returns -1 without changing optind. If + * argv[optind] points to the string "--" + * getopt() returns -1 after incrementing optind. + */ + if (getopt_sp == 1) { + if (optind >= argc || argv[optind][0] != '-' || + argv[optind] == NULL || argv[optind][1] == '\0') + return (EOF); + else if (strcmp(argv[optind], "--") == 0) { + optind++; + return (EOF); + } + } + + /* + * Getting this far indicates that an option has been encountered. + * Note that the syntax of optstring applies special meanings to + * the characters ':' and '(', so they are not permissible as + * option letters. A special meaning is also applied to the ')' + * character, but its meaning can be determined from context. + * Note that the specification only requires that the alnum + * characters be accepted. + */ + optopt = c = (unsigned char)argv[optind][getopt_sp]; + optarg = NULL; + if ((cp = parse(optstring, c)) == NULL) { + /* LINTED: variable format specifier */ + ERR("illegal option", c); + if (argv[optind][++getopt_sp] == '\0') { + optind++; + getopt_sp = 1; + } + return ('?'); + } + optopt = c = *cp; + + /* + * A valid option has been identified. If it should have an + * option-argument, process that now. SUS defines the setting + * of optarg as follows: + * + * 1. If the option was the last character in the string pointed to + * by an element of argv, then optarg contains the next element + * of argv, and optind is incremented by 2. If the resulting + * value of optind is not less than argc, this indicates a + * missing option-argument, and getopt() returns an error + * indication. + * + * 2. Otherwise, optarg points to the string following the option + * character in that element of argv, and optind is incremented + * by 1. + * + * The second clause allows -abcd (where b requires an option-argument) + * to be interpreted as "-a -b cd". + */ + if (*(cp + 1) == ':') { + /* The option takes an argument */ + if (argv[optind][getopt_sp+1] != '\0') { + optarg = &argv[optind++][getopt_sp+1]; + } else if (++optind >= argc) { + /* LINTED: variable format specifier */ + ERR("option requires an argument", c); + getopt_sp = 1; + optarg = NULL; + return (optstring[0] == ':' ? ':' : '?'); + } else + optarg = argv[optind++]; + getopt_sp = 1; + } else { + /* The option does NOT take an argument */ + if (argv[optind][++getopt_sp] == '\0') { + getopt_sp = 1; + optind++; + } + optarg = NULL; + } + return (c); +} /* getopt() */ + +#ifdef __APPLE__ +/* + * Starting with Mac OS 10.5 Leopard, <unistd.h> turns getopt() + * into getopt$UNIX2003() by default. Consequently, this function + * is called instead of the one defined above. However, optind is + * still taken from this file, so in effect, options are not + * properly handled. Defining an own getopt$UNIX2003() function + * works around this issue. + */ +int +getopt$UNIX2003(int argc, char *const argv[], const char *optstring) +{ + return getopt(argc, argv, optstring); +} +#endif /* __APPLE__ */ diff --git a/lex/header.c b/lex/header.c @@ -0,0 +1,409 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "header.c 6.22 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)header.c 1.12 (gritter) 9/23/06 + */ + +#include "ldefs.c" + +static void rhd1(void); +static void chd1(void); +static void chd2(void); +static void ctail(void); +static void rtail(void); + +void +phead1(void) +{ + ratfor ? rhd1() : chd1(); +} + +static void +chd1(void) +{ + if (*v_stmp == 'y') { + extern const char rel[]; + fprintf(fout, "\ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4\n\ +#define YYUSED __attribute__ ((used))\n\ +#elif defined __GNUC__\n\ +#define YYUSED __attribute__ ((unused))\n\ +#else\n\ +#define YYUSED\n\ +#endif\n\ +static const char yylexid[] USED = \"lex: %s\"\n", rel); + } + if (handleeuc) { + fprintf(fout, "#ifndef EUC\n"); + fprintf(fout, "#define EUC\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#include <stdio.h>\n"); + fprintf(fout, "#include <stdlib.h>\n"); + fprintf(fout, "#ifdef __sun\n"); + fprintf(fout, "#include <widec.h>\n"); + fprintf(fout, "#else /* !__sun */\n"); + fprintf(fout, "#include <wchar.h>\n"); + fprintf(fout, "#endif /* !__sun */\n"); + if (widecio) { /* -w option */ + fprintf(fout, "#define YYTEXT yytext\n"); + fprintf(fout, "#define YYLENG yyleng\n"); + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "#define YYINPUT input\n"); + fprintf(fout, "#define YYOUTPUT output\n"); + fprintf(fout, "#else\n"); + fprintf(fout, "#define YYINPUT lex_input\n"); + fprintf(fout, "#define YYOUTPUT lex_output\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#define YYUNPUT unput\n"); + } else { /* -e option */ + fprintf(fout, "#include <limits.h>\n"); + fprintf(fout, "#ifdef __sun\n"); + fprintf(fout, "#include <sys/euc.h>\n"); + fprintf(fout, "#endif /* __sun */\n"); + fprintf(fout, "#define YYLEX_E 1\n"); + fprintf(fout, "#define YYTEXT yywtext\n"); + fprintf(fout, "#define YYLENG yywleng\n"); + fprintf(fout, "#define YYINPUT yywinput\n"); + fprintf(fout, "#define YYOUTPUT yywoutput\n"); + fprintf(fout, "#define YYUNPUT yywunput\n"); + } + } else { /* ASCII compatibility mode. */ + fprintf(fout, "#include <stdio.h>\n"); + fprintf(fout, "#include <stdlib.h>\n"); + } + if (ZCH > NCH) + fprintf(fout, "# define U(x) ((x)&0377)\n"); + else + fprintf(fout, "# define U(x) x\n"); + fprintf(fout, "# define NLSTATE yyprevious=YYNEWLINE\n"); + fprintf(fout, "# define BEGIN yybgin = yysvec + 1 +\n"); + fprintf(fout, "# define INITIAL 0\n"); + fprintf(fout, "# define YYLERR yysvec\n"); + fprintf(fout, "# define YYSTATE (yyestate-yysvec-1)\n"); + if (optim) + fprintf(fout, "# define YYOPTIM 1\n"); +#ifdef DEBUG + fprintf(fout, "# define LEXDEBUG 1\n"); +#endif + fprintf(fout, "# ifndef YYLMAX \n"); + fprintf(fout, "# define YYLMAX BUFSIZ\n"); + fprintf(fout, "# endif \n"); + fprintf(fout, "#ifndef __cplusplus\n"); + if (widecio) + fprintf(fout, "# define output(c) (void)putwc(c,yyout)\n"); + else + fprintf(fout, "# define output(c) (void)putc(c,yyout)\n"); + fprintf(fout, "#else\n"); + if (widecio) + fprintf(fout, "# define lex_output(c) (void)putwc(c,yyout)\n"); + else + fprintf(fout, "# define lex_output(c) (void)putc(c,yyout)\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\n#if defined(__cplusplus) || defined(__STDC__)\n"); + fprintf(fout, "\n#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(fout, "extern \"C\" {\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\tint yyback(int *, int);\n"); /* ? */ + fprintf(fout, "\tint yyinput(void);\n"); /* ? */ + fprintf(fout, "\tint yylook(void);\n"); /* ? */ + fprintf(fout, "\tvoid yyoutput(int);\n"); /* ? */ + fprintf(fout, "\tint yyracc(int);\n"); /* ? */ + fprintf(fout, "\tint yyreject(void);\n"); /* ? */ + fprintf(fout, "\tvoid yyunput(int);\n"); /* ? */ + fprintf(fout, "\tint yylex(void);\n"); + fprintf(fout, "#ifdef YYLEX_E\n"); + fprintf(fout, "\tvoid yywoutput(wchar_t);\n"); + fprintf(fout, "\twchar_t yywinput(void);\n"); + fprintf(fout, "\tvoid yywunput(wchar_t);\n"); + fprintf(fout, "#endif\n"); + + /* XCU4: type of yyless is int */ + fprintf(fout, "#ifndef yyless\n"); + fprintf(fout, "\tint yyless(int);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#ifndef yywrap\n"); + fprintf(fout, "\tint yywrap(void);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#ifdef LEXDEBUG\n"); + fprintf(fout, "\tvoid allprint(char);\n"); + fprintf(fout, "\tvoid sprint(char *);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(fout, "}\n"); + fprintf(fout, "#endif\n\n"); + fprintf(fout, "#ifdef __cplusplus\n"); + fprintf(fout, "extern \"C\" {\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\tvoid exit(int);\n"); + fprintf(fout, "#ifdef __cplusplus\n"); + fprintf(fout, "}\n"); + fprintf(fout, "#endif\n\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "# define unput(c)" + " {yytchar= (c);if(yytchar=='\\n')yylineno--;*yysptr++=yytchar;}\n"); + fprintf(fout, "# define yymore() (yymorfg=1)\n"); + if (widecio) { + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "%s%d%s\n", +"# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):yygetwchar())==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#else\n"); + fprintf(fout, "%s%d%s\n", +"# define lex_input() (((yytchar=yysptr>yysbuf?U(*--yysptr):yygetwchar())==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "# define ECHO (void)fprintf(yyout, \"%%ls\",yytext)\n"); + fprintf(fout, + "# define REJECT { nstr = yyreject_w(); goto yyfussy;}\n"); + fprintf(fout, "#define yyless yyless_w\n"); + fprintf(fout, "int yyreject_w(void);\n"); + fprintf(fout, "int yyleng;\n"); + + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, "wchar_t yytext[YYLMAX];\n"); + } else { + fprintf(fout, "wchar_t yy_tbuf[YYLMAX];\n"); + fprintf(fout, "wchar_t * yytext = yy_tbuf;\n"); + fprintf(fout, "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } else { + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "%s%d%s\n", +"# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#else\n"); + fprintf(fout, "%s%d%s\n", +"# define lex_input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#define ECHO fprintf(yyout, \"%%s\",yytext)\n"); + if (handleeuc) { + fprintf(fout, +"# define REJECT { nstr = yyreject_e(); goto yyfussy;}\n"); + fprintf(fout, "int yyreject_e(void);\n"); + fprintf(fout, "int yyleng;\n"); + fprintf(fout, "size_t yywleng;\n"); + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, + "unsigned char yytext[YYLMAX*MB_LEN_MAX];\n"); + fprintf(fout, + "wchar_t yywtext[YYLMAX];\n"); + } else { + fprintf(fout, + "wchar_t yy_twbuf[YYLMAX];\n"); + fprintf(fout, + "wchar_t yy_tbuf[YYLMAX*MB_LEN_MAX];\n"); + fprintf(fout, + "unsigned char * yytext =" + "(unsigned char *)yy_tbuf;\n"); + fprintf(fout, + "wchar_t * yywtext = yy_twbuf;\n"); + fprintf(fout, + "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } else { + fprintf(fout, +"# define REJECT { nstr = yyreject(); goto yyfussy;}\n"); + fprintf(fout, "int yyleng;\n"); + + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, "char yytext[YYLMAX];\n"); + } else { + fprintf(fout, "char yy_tbuf[YYLMAX];\n"); + fprintf(fout, + "char * yytext = yy_tbuf;\n"); + fprintf(fout, + "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, + "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } + } + fprintf(fout, "int yymorfg;\n"); + if (handleeuc) + fprintf(fout, "extern wchar_t *yysptr, yysbuf[];\n"); + else + fprintf(fout, "extern char *yysptr, yysbuf[];\n"); + fprintf(fout, "int yytchar;\n"); + fprintf(fout, "FILE *yyin = (FILE *)-1, *yyout = (FILE *)-1;\n"); + fprintf(fout, "#if defined (__GNUC__)\n"); + fprintf(fout, + "static void _yyioinit(void) __attribute__ ((constructor));\n"); + fprintf(fout, "#elif defined (__SUNPRO_C)\n"); + fprintf(fout, "#pragma init (_yyioinit)\n"); + fprintf(fout, "#elif defined (__HP_aCC) || defined (__hpux)\n"); + fprintf(fout, "#pragma INIT \"_yyioinit\"\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "static void _yyioinit(void) {\n"); + fprintf(fout, "yyin = stdin; yyout = stdout; }\n"); + fprintf(fout, "extern int yylineno;\n"); + fprintf(fout, "struct yysvf { \n"); + fprintf(fout, "\tstruct yywork *yystoff;\n"); + fprintf(fout, "\tstruct yysvf *yyother;\n"); + fprintf(fout, "\tint *yystops;};\n"); + fprintf(fout, "struct yysvf *yyestate;\n"); + fprintf(fout, "extern struct yysvf yysvec[], *yybgin;\n"); +} + +static void +rhd1(void) +{ + fprintf(fout, "integer function yylex(dummy)\n"); + fprintf(fout, "define YYLMAX 200\n"); + fprintf(fout, "define ECHO call yyecho(yytext,yyleng)\n"); + fprintf(fout, + "define REJECT nstr = yyrjct(yytext,yyleng);goto 30998\n"); + fprintf(fout, "integer nstr,yylook,yywrap\n"); + fprintf(fout, "integer yyleng, yytext(YYLMAX)\n"); + fprintf(fout, "common /yyxel/ yyleng, yytext\n"); + fprintf(fout, + "common /yyldat/ yyfnd, yymorf, yyprev, yybgin, yylsp, yylsta\n"); + fprintf(fout, + "integer yyfnd, yymorf, yyprev, yybgin, yylsp, yylsta(YYLMAX)\n"); + fprintf(fout, "for(;;){\n"); + fprintf(fout, "\t30999 nstr = yylook(dummy)\n"); + fprintf(fout, "\tgoto 30998\n"); + fprintf(fout, "\t30000 k = yywrap(dummy)\n"); + fprintf(fout, "\tif(k .ne. 0){\n"); + fprintf(fout, "\tyylex=0; return; }\n"); + fprintf(fout, "\t\telse goto 30998\n"); +} + +void +phead2(void) +{ + if (!ratfor) + chd2(); +} + +static void +chd2(void) +{ + fprintf(fout, "if (yyin == (FILE *)-1) yyin = stdin;\n"); + fprintf(fout, "if (yyout == (FILE *)-1) yyout = stdout;\n"); + fprintf(fout, "#if defined (__cplusplus) || defined (__GNUC__)\n"); + fprintf(fout, + "/* to avoid CC and lint complaining yyfussy not being used ...*/\n"); + fprintf(fout, "{static int __lex_hack = 0;\n"); + fprintf(fout, "if (__lex_hack) { yyprevious = 0; goto yyfussy; } }\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "while((nstr = yylook()) >= 0)\n"); + fprintf(fout, "yyfussy: switch(nstr){\n"); + fprintf(fout, "case 0:\n"); + fprintf(fout, "if(yywrap()) return(0); break;\n"); +} + +void +ptail(void) +{ + if (!pflag) + ratfor ? rtail() : ctail(); + pflag = 1; +} + +static void +ctail(void) +{ + fprintf(fout, "case -1:\nbreak;\n"); /* for reject */ + fprintf(fout, "default:\n"); + fprintf(fout, + "(void)fprintf(yyout,\"bad switch yylook %%d\",nstr);\n"); + fprintf(fout, "} return(0); }\n"); + fprintf(fout, "/* end of yylex */\n"); +} + +static void +rtail(void) +{ + int i; + fprintf(fout, + "\n30998 if(nstr .lt. 0 .or. nstr .gt. %d)goto 30999\n", casecount); + fprintf(fout, "nstr = nstr + 1\n"); + fprintf(fout, "goto(\n"); + for (i = 0; i < casecount; i++) + fprintf(fout, "%d,\n", 30000+i); + fprintf(fout, "30999),nstr\n"); + fprintf(fout, "30997 continue\n"); + fprintf(fout, "}\nend\n"); +} + +void +statistics(void) +{ + fprintf(errorf, +"%d/%d nodes(%%e), %d/%d positions(%%p), %d/%d (%%n), %ld transitions,\n", + tptr, treesize, nxtpos-positions, maxpos, stnum + 1, nstates, rcount); + fprintf(errorf, + "%d/%d packed char classes(%%k), ", pcptr-pchar, pchlen); + if (optim) + fprintf(errorf, + " %d/%d packed transitions(%%a), ", nptr, ntrans); + fprintf(errorf, " %d/%d output slots(%%o)", yytop, outsize); + putc('\n', errorf); +} diff --git a/lex/ldefs.c b/lex/ldefs.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "ldefs.c 6.16 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)ldefs.c 1.7 (gritter) 4/14/07 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#ifdef __sun +#include <widec.h> +#endif +#include <wctype.h> + +#define CHR wchar_t +#define BYTE char +#define Boolean char +#define LONG_WCHAR_T 1 + +#define PP 1 +#ifdef u370 +#define CWIDTH 8 +#define CMASK 0377 +#define ASCII 1 +#else + +#ifdef unix +#define CWIDTH 7 +#define CMASK 0177 +#define ASCII 1 +#endif + +#ifdef gcos +#define CWIDTH 9 +#define CMASK 0777 +#define ASCII 1 +#endif + +#ifdef ibm +#define CWIDTH 8 +#define CMASK 0377 +#define EBCDIC 1 +#endif +#endif + +#define NCH 256 +#define TOKENSIZE 10000 +#define DEFSIZE 1000 +#define DEFCHAR 2000 +#define BUF_SIZ 2000 +#define STARTCHAR 2560 +#define STARTSIZE 256 +#define CCLSIZE 20000 + + +#ifdef SMALL +#define TREESIZE 600 +#define NTRANS 1500 +#define NSTATES 300 +#define MAXPOS 1500 +#define MAXPOSSTATE 500 +#define NOUTPUT 1500 +#endif + +#ifndef SMALL +#define TREESIZE 1000 +#define NSTATES 500 +#define MAXPOS 2500 +#define MAXPOSSTATE 4*1000 +#define NTRANS 2000 +#define NOUTPUT 4*3000 +#endif +#define NACTIONS 4*1000 +#define ALITTLEEXTRA 300 + +#define RCCL 0x4000 +#define RNCCL 0x4001 +#define RSTR 0x4002 +#define RSCON 0x4003 +/* XCU4: add RXSCON */ +#define RXSCON 0x4011 +#define RNEWE 0x4004 +#define FINAL 0x4005 +#define RNULLS 0x4006 +#define RCAT 0x4007 +#define STAR 0x4008 +#define PLUS 0x4009 +#define QUEST 0x400a +#define DIV 0x400b +#define BAR 0x400c +#define CARAT 0x400d +#define S1FINAL 0x400e +#define S2FINAL 0x400f +#define DOT 0x4010 +#define ISOPERATOR(n) ((n & 0xc080) == 0x4000) + +/* + * New to JLE; this is not really a node tag. + * This is used in a string pointed to by + * the leaf of an RCCL or RNCCL node as a + * special prefix code that substitutes + * the infix '-' range operator. For + * example, a lex character class "[_0-9a-zA-Z]" + * would be translated to the intermidiate + * form: + * RCCL + * | + * | + * v + * "_<RANGE>09<RANGE>a-z<RANGE>A-Z" + */ +#define RANGE 0x40ff + +#define MAXNCG 1000 +extern int ncgidtbl; +extern int ncg; /* ncg == ncgidtbl * 2 */ +typedef unsigned long lchar; +extern lchar yycgidtbl[]; +extern int yycgid(wchar_t); +extern Boolean handleeuc; /* TRUE iff -w or -e option is specified. */ +extern Boolean widecio; /* TRUE iff -w option is specified. */ + +#define DEFSECTION 1 +#define RULESECTION 2 +#define ENDSECTION 5 + +#define PC 1 +#define PS 1 + +#ifdef DEBUG +#define LINESIZE 110 +extern int yydebug; +extern int debug; /* 1 = on */ +extern int charc; +#endif + +#ifndef DEBUG +#define freturn(s) s +#endif + +#undef FALSE +#undef TRUE +enum { + FALSE, + TRUE +}; + + +extern int optind; +extern int no_input; +extern int sargc; +extern char **sargv; +extern char *v_stmp; +extern char *release_string; +extern CHR buf[]; +extern int ratfor; /* 1 = ratfor, 0 = C */ +extern int fatal; +extern int n_error; +extern int copy_line; +extern int yyline; /* line number of file */ +extern int sect; +extern int eof; +extern int lgatflg; +extern int divflg; +extern int funcflag; +extern int pflag; +extern int casecount; +extern int chset; /* 1 = CHR set modified */ +extern FILE *fin, *fout, *fother, *errorf; +extern int fptr; +extern char *ratname, *cname; +extern int prev; /* previous input character */ +extern int pres; /* present input character */ +extern int peek; /* next input character */ +extern int *name; +extern intptr_t *left; +extern intptr_t *right; +extern int *parent; +extern Boolean *nullstr; +extern int tptr; +extern CHR pushc[TOKENSIZE]; +extern CHR *pushptr; +extern CHR slist[STARTSIZE]; +extern CHR *slptr; +extern CHR **def, **subs, *dchar; +extern CHR **sname, *schar; +/* XCU4: %x exclusive start */ +extern int *exclusive; +extern CHR *ccl; +extern CHR *ccptr; +extern CHR *dp, *sp; +extern int dptr, sptr; +extern CHR *bptr; /* store input position */ +extern CHR *tmpstat; +extern int count; +extern int **foll; +extern int *nxtpos; +extern int *positions; +extern int *gotof; +extern int *nexts; +extern CHR *nchar; +extern int **state; +extern int *sfall; /* fallback state num */ +extern Boolean *cpackflg; /* true if state has been character packed */ +extern int *atable, aptr; +extern int nptr; +extern Boolean symbol[MAXNCG]; +extern CHR cindex[MAXNCG]; +extern int xstate; +extern int stnum; +extern int ctable[]; +extern int ZCH; +extern int ccount; +extern CHR match[MAXNCG]; +extern BYTE extra[]; +extern CHR *pcptr, *pchar; +extern int pchlen; +extern int nstates, maxpos; +extern int yytop; +extern int report; +extern int ntrans, treesize, outsize; +extern long rcount; +extern int optim; +extern int *verify, *advance, *stoff; +extern int scon; +extern CHR *psave; +extern CHR *getl(CHR *); +extern void *myalloc(int, int); + +void phead1(void); +void phead2(void); +void ptail(void); +void statistics(void); +void error_tail(void); +void error(const char *, ...); +void warning(const char *, ...); +void lgate(void); +void scopy(CHR *s, CHR *t); +void cclinter(int sw); +void cpycom(CHR *p); +void munput(int t, CHR *p); +void cfoll(int v); +void cgoto(void); +void mkmatch(void); +void layout(void); +void remch(wchar_t c); +void sortcgidtbl(void); +void repbycgid(void); +int gch(void); +int slength(CHR *s); +int yyparse(void); +int scomp(CHR *x, CHR *y); +int space(int ch); +int siconv(CHR *t); +int digit(int c); +int ctrans(CHR **ss); +int cpyact(void); +int lookup(CHR *s, CHR **t); +int usescape(int c); +int alpha(int c); +int mn2(int a, intptr_t d, intptr_t c); +int mn1(int a, intptr_t d); +int mn0(int a); +int dupl(int n); + +#undef getwc +#define getwc(f) lex_getwc(f) +extern wint_t lex_getwc(FILE *); +#undef putwc +#define putwc(c, f) lex_putwc(c, f) +extern wint_t lex_putwc(wchar_t, FILE *); + +#undef index +#define index lex_index + +extern int isArray; /* XCU4: for %array %pointer */ diff --git a/lex/lex.1 b/lex/lex.1 @@ -0,0 +1,131 @@ +.\" +.\" Sccsid @(#)lex.1 1.5 (gritter) 11/27/05 +.\" Derived from lex(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH LEX 1 "11/27/05" "Heirloom Development Tools" "User Commands" +.SH NAME +lex \- generator of lexical analysis programs +.SH SYNOPSIS +.HP +.ad l +.nh +\fBlex\fR [\fB\-ctvnVew\fR] [\fB\-Q\fR(\fBy\fR|\fBn\fR)] +[\fB\-Y\ \fIdirectory\fR] +[\fIfiles\fR\ ...] +.br +.ad b +.SH DESCRIPTION +.I Lex +generates programs to be used in simple lexical analyis of text. +The input +.I files +(standard input default) +contain regular expressions +to be searched for, and actions written in C to be executed when +expressions are found. +.PP +A C source program, `lex.yy.c' is generated, to be compiled thus: +.IP +cc lex.yy.c \-ll +.LP +This program, when run, copies unrecognized portions of +the input to the output, +and executes the associated +C action for each regular expression that is recognized. +.PP +The following +.I lex +program converts upper case to lower, +removes blanks at the end of lines, +and replaces multiple blanks by single blanks. +.IP "" +.nf +.ta \w'[A\-Z] 'u +%% +[A\-Z] putchar(yytext[0]+\'a\'\-\'A\'); +[ ]+$ +[ ]+ putchar(\' \'); +.fi +.PP +The options have the following meanings. +.TP +.B \-c +Generate output in the `C' language. +This is the default. +.TP +.B \-e +Generates output that can handle multibyte characters, +with \fIyytext[]\fR being of type \fIunsigned char[]\fR. +This option is an extension. +.TP +.B \-n +Opposite of +.BR \-v ; +.B \-n +is default. +.TP +\fB\-Q\fR(\fBy\fR|\fBn\fR) +With +.BR \-Qy , +a version identification variable is put into lex.yy.c. +With +.B \-Qn +(the default), no such variable is generated. +.TP +.B \-t +Place the result on the standard output instead of in file +`lex.yy.c'. +.TP +.B \-v +Print a one-line summary of statistics of the generated analyzer. +.TP +.B \-V +Causes version information for +.I lex +to be printed. +.TP +.B \-w +Generates output that can handle multibyte characters, +with \fIyytext[]\fR being of type \fIwchar_t[]\fR. +This option is an extension. +.TP +\fB\-Y \fIdirectory\fR +Use `\fIdirectory\fR' to locate driver files, +instead of the default `/usr/ccs/lib/lex'. +This option is an extension. +.SH "SEE ALSO" +yacc(1) +.br +M. E. Lesk and E. Schmidt, +.I LEX \- Lexical Analyzer Generator diff --git a/lex/libmain.c b/lex/libmain.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "libmain.c 6.6 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)libmain.c 1.4 (gritter) 11/26/05 + */ + +#include "stdio.h" + +extern int yylex(void); + +int +main(void) +{ + yylex(); + return (0); +} diff --git a/lex/lsearch.c b/lex/lsearch.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "lsearch.c 1.15 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)lsearch.c 1.4 (gritter) 11/26/05 + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Linear search algorithm, generalized from Knuth (6.1) Algorithm Q. + * + * This version no longer has anything to do with Knuth's Algorithm Q, + * which first copies the new element into the table, then looks for it. + * The assumption there was that the cost of checking for the end of the + * table before each comparison outweighed the cost of the comparison, which + * isn't true when an arbitrary comparison function must be called and when the + * copy itself takes a significant number of cycles. + * Actually, it has now reverted to Algorithm S, which is "simpler." + */ + +#include <sys/types.h> +#include <stddef.h> +#include <string.h> +#include "search.h" + +void * +xlsearch(const void *ky, void *bs, unsigned *nelp, unsigned width, + int (*compar)(const void *, const void *)) +{ + char *key = (char *)ky; + char *base = (char *)bs; + char *next = base + *nelp * width; /* End of table */ + void *res; + + for (; base < next; base += width) + if ((*compar)(key, base) == 0) + return (base); /* Key found */ + ++*nelp; /* Not found, add to table */ + res = memcpy(base, key, width); /* base now == next */ + return (res); +} diff --git a/lex/main.c b/lex/main.c @@ -0,0 +1,364 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* Copyright 1976, Bell Telephone Laboratories, Inc. */ + +/* from OpenSolaris "main.c 6.16 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)main.c 1.9 (gritter) 11/26/05 + */ + +#include <string.h> +#include "once.h" +#include "sgs.h" +#include <locale.h> +#include <limits.h> +#include <unistd.h> + +static wchar_t L_INITIAL[] = {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0}; + +static void get1core(void); +static void free1core(void); +static void get2core(void); +static void free2core(void); +static void get3core(void); +#ifdef DEBUG +static void free3core(void); +#endif + +int +main(int argc, char **argv) +{ + int i; + int c; + char *path = NULL; + Boolean eoption = 0, woption = 0; + + sargv = argv; + sargc = argc; + errorf = stderr; + setlocale(LC_CTYPE, ""); +#ifdef DEBUG + while ((c = getopt(argc, argv, "dyctvnewVQ:Y:")) != EOF) { +#else + while ((c = getopt(argc, argv, "ctvnewVQ:Y:")) != EOF) { +#endif + switch (c) { +#ifdef DEBUG + case 'd': + debug++; + break; + case 'y': + yydebug = TRUE; + break; +#endif + case 'V': + fprintf(stderr, "lex:%s , %s\n", pkg, rel); + break; + case 'Q': + v_stmp = optarg; + if (*v_stmp != 'y' && *v_stmp != 'n') + error( + "lex: -Q should be followed by [y/n]"); + break; + case 'Y': + path = malloc(strlen(optarg) + + sizeof ("/nceucform") + 1); + path = strcpy(path, optarg); + break; + case 'c': + ratfor = FALSE; + break; + case 't': + fout = stdout; + break; + case 'v': + report = 1; + break; + case 'n': + report = 0; + break; + case 'w': + case 'W': + woption = 1; + handleeuc = 1; + widecio = 1; + break; + case 'e': + case 'E': + eoption = 1; + handleeuc = 1; + widecio = 0; + break; + default: + fprintf(stderr, + "Usage: lex [-ctvnV] [-Q(y/n)] [files...]\n"); + exit(1); + } + } + if (woption && eoption) { + error( + "You may not specify both -w and -e simultaneously."); + } + no_input = argc - optind; + if (no_input) { + /* XCU4: recognize "-" file operand for stdin */ + if (strcmp(argv[optind], "-") == 0) + fin = stdin; + else { + fin = fopen(argv[optind], "r"); + if (fin == NULL) + error( + "Can't open input file -- %s", argv[optind]); + } + } else + fin = stdin; + + /* may be gotten: def, subs, sname, schar, ccl, dchar */ + gch(); + + /* may be gotten: name, left, right, nullstr, parent */ + get1core(); + + scopy(L_INITIAL, sp); + sname[0] = sp; + sp += slength(L_INITIAL) + 1; + sname[1] = 0; + + /* XCU4: %x exclusive start */ + exclusive[0] = 0; + + if (!handleeuc) { + /* + * Set ZCH and ncg to their default values + * as they may be needed to handle %t directive. + */ + ZCH = ncg = NCH; /* ncg behaves as constant in this mode. */ + } + + /* may be disposed of: def, subs, dchar */ + if (yyparse()) + exit(1); /* error return code */ + + if (handleeuc) { + ncg = ncgidtbl * 2; + ZCH = ncg; + if (ncg >= MAXNCG) + error( + "Too complex rules -- requires too many char groups."); + sortcgidtbl(); + } + repbycgid(); /* Call this even in ASCII compat. mode. */ + + /* + * maybe get: + * tmpstat, foll, positions, gotof, nexts, + * nchar, state, atable, sfall, cpackflg + */ + free1core(); + get2core(); + ptail(); + mkmatch(); +#ifdef DEBUG + if (debug) + pccl(); +#endif + sect = ENDSECTION; + if (tptr > 0) + cfoll(tptr-1); +#ifdef DEBUG + if (debug) + pfoll(); +#endif + cgoto(); +#ifdef DEBUG + if (debug) { + printf("Print %d states:\n", stnum + 1); + for (i = 0; i <= stnum; i++) + stprt(i); + } +#endif + /* + * may be disposed of: + * positions, tmpstat, foll, state, name, + * left, right, parent, ccl, schar, sname + * maybe get: verify, advance, stoff + */ + free2core(); + get3core(); + layout(); + /* + * may be disposed of: + * verify, advance, stoff, nexts, nchar, + * gotof, atable, ccpackflg, sfall + */ + +#ifdef DEBUG + free3core(); +#endif + if (path == NULL) { + static char formpath[sizeof FORMPATH + 20] = FORMPATH; + path = formpath; + } + + if (handleeuc) { + if (ratfor) + error("Ratfor is not supported by -w or -e option."); + strcat(path, "/nceucform"); + } + else + strcat(path, ratfor ? "/nrform" : "/ncform"); + + fother = fopen(path, "r"); + if (fother == NULL) + error("Lex driver missing, file %s", path); + while ((i = getc(fother)) != EOF) + putc(i, fout); + fclose(fother); + fclose(fout); + if (report == 1) + statistics(); + fclose(stdout); + fclose(stderr); + return (0); /* success return code */ +} + +static void +get1core(void) +{ + ccptr = ccl = myalloc(CCLSIZE, sizeof (*ccl)); + pcptr = pchar = myalloc(pchlen, sizeof (*pchar)); + def = myalloc(DEFSIZE, sizeof (*def)); + subs = myalloc(DEFSIZE, sizeof (*subs)); + dp = dchar = myalloc(DEFCHAR, sizeof (*dchar)); + sname = myalloc(STARTSIZE, sizeof (*sname)); + /* XCU4: exclusive start array */ + exclusive = myalloc(STARTSIZE, sizeof (*exclusive)); + sp = schar = myalloc(STARTCHAR, sizeof (*schar)); + if (ccl == 0 || def == 0 || + pchar == 0 || subs == 0 || dchar == 0 || + sname == 0 || exclusive == 0 || schar == 0) + error("Too little core to begin"); +} + +static void +free1core(void) +{ + free(def); + free(subs); + free(dchar); +} + +static void +get2core(void) +{ + int i; + gotof = myalloc(nstates, sizeof (*gotof)); + nexts = myalloc(ntrans, sizeof (*nexts)); + nchar = myalloc(ntrans, sizeof (*nchar)); + state = myalloc(nstates, sizeof (*state)); + atable =myalloc(nstates, sizeof (*atable)); + sfall = myalloc(nstates, sizeof (*sfall)); + cpackflg = myalloc(nstates, sizeof (*cpackflg)); + tmpstat = myalloc(tptr+1, sizeof (*tmpstat)); + foll = myalloc(tptr+1, sizeof (*foll)); + nxtpos = positions = myalloc(maxpos, sizeof (*positions)); + if (tmpstat == 0 || foll == 0 || positions == 0 || + gotof == 0 || nexts == 0 || nchar == 0 || + state == 0 || atable == 0 || sfall == 0 || cpackflg == 0) + error("Too little core for state generation"); + for (i = 0; i <= tptr; i++) + foll[i] = 0; +} + +static void +free2core(void) +{ + free(positions); + free(tmpstat); + free(foll); + free(name); + free(left); + free(right); + free(parent); + free(nullstr); + free(state); + free(sname); + /* XCU4: exclusive start array */ + free(exclusive); + free(schar); + free(ccl); +} + +static void +get3core(void) +{ + verify = myalloc(outsize, sizeof (*verify)); + advance = myalloc(outsize, sizeof (*advance)); + stoff = myalloc(stnum+2, sizeof (*stoff)); + if (verify == 0 || advance == 0 || stoff == 0) + error("Too little core for final packing"); +} + +#ifdef DEBUG +static void +free3core(void) +{ + free(advance); + free(verify); + free(stoff); + free(gotof); + free(nexts); + free(nchar); + free(atable); + free(sfall); + free(cpackflg); +} +#endif + +void * +myalloc(int a, int b) +{ + void *i; + i = calloc(a, b); + if (i == NULL) + warning("calloc returns a 0"); + return (i); +} + +void +yyerror(char *s) +{ + fprintf(stderr, + "\"%s\":line %d: Error: %s\n", sargv[optind], yyline, s); +} diff --git a/lex/mkfile b/lex/mkfile @@ -0,0 +1,26 @@ +BIN = lex +LIB = libl.a +OBJ = main.o sub1.o sub2.o sub3.o header.o wcio.o parser.o getopt.o lsearch.o +LOBJ = allprint.o libmain.o reject.o yyless.o yywrap.o \ + allprint_w.o reject_w.o yyless_w.o reject_e.o yyless_e.o +LOCAL_CFLAGS = -DFORMPATH=\"$LIBDIR/lex\" +CLEAN_FILES = parser.c +INSTALL_BIN = lex +INSTALL_MAN1 = lex.1 +INSTALL_OTHER1 = nceucform ncform nrform +INSTALL_OTHER1_DIR = $LIBDIR/lex +DEPS = yacc + +WFLAGS = -DEUC -DJLSLEX -DWOPTION +EFLAGS = -DEUC -DJLSLEX -DEOPTION + +<$mkbuild/mk.default + +%_w.o:Q: %.c + echo CC $target + $CC -c $CFLAGS $CPPFLAGS $WFLAGS $prereq -o $target + +%_e.o:Q: %.c + echo CC $target + $CC -c $CFLAGS $CPPFLAGS $EFLAGS $prereq -o $target + diff --git a/lex/nceucform b/lex/nceucform @@ -0,0 +1,480 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "nceucform 1.17 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)nceucform 1.6 (gritter) 11/18/05 + */ + +#if defined(__cplusplus) || defined(__STDC__) +int yycgid(wchar_t); +wint_t yygetwchar(void); +#endif +int yylineno =1; +int yygid; +#define LONG_WCHAR_T 1 +# define YYU(x) x +# define NLSTATE yyprevious=YYNEWLINE +wchar_t yysbuf[YYLMAX]; +wchar_t *yysptr = yysbuf; +struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp; +int *yyfnd; +extern struct yysvf *yyestate; +int yyprevious = YYNEWLINE; +#if defined(__cplusplus) || defined(__STDC__) +int yylook(void) +#else +yylook() +#endif +{ + struct yysvf *yystate, **lsp; + struct yywork *yyt; + struct yysvf *yyz; + int yych, yyfirst; + struct yywork *yyr; +# ifdef LEXDEBUG + int debug; +# endif + wchar_t *yylastch; + /* start off machines */ +# ifdef LEXDEBUG + debug = 0; +# endif + yyfirst=1; + if (!yymorfg) + yylastch = YYTEXT; + else { + yymorfg=0; + yylastch = YYTEXT+YYLENG; + } + for(;;){ + lsp = yylstate; + yyestate = yystate = yybgin; + if (yyprevious==YYNEWLINE) yystate++; + for (;;){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1); +# endif + yyt = yystate->yystoff; + if(yyt == yycrank && !yyfirst){ /* may not be any transitions */ + yyz = yystate->yyother; + if(yyz == 0)break; + if(yyz->yystoff == yycrank)break; + } + *yylastch++ = yych = YYINPUT(); +#ifdef YYISARRAY + if(yylastch > &YYTEXT[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } +#else + if (yylastch >= &YYTEXT[ yytextsz ]) { + int x = yylastch - YYTEXT; + + yytextsz += YYTEXTSZINC; +#ifdef YYLEX_E /* -e */ + if (YYTEXT == yy_twbuf) { + YYTEXT = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(YYTEXT, yy_twbuf, + sizeof (yy_twbuf)); + yytext = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(yytext, yy_tbuf, + sizeof (yy_tbuf)); +#else + if (YYTEXT == yy_tbuf) { + YYTEXT = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(YYTEXT, yy_tbuf, + sizeof (yy_tbuf)); +#endif + } + else { + YYTEXT = (wchar_t *) + realloc(YYTEXT, yytextsz); +#ifdef YYLEX_E /* -e */ + yytext = (wchar_t *) + realloc(yytext, + yytextsz * sizeof (wchar_t)); +#endif + } + if (!YYTEXT) { + fprintf(yyout, + "Cannot realloc YYTEXT\n"); + exit(1); + } + yylastch = YYTEXT + x; + } +#endif + yygid = yycgid(yych); + yyfirst=0; + tryagain: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"wchar_t "); + allprint(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + yyr = yyt; + if ( yyt > yycrank){ + yyt = yyr + yygid; + if (yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } +# ifdef YYOPTIM + else if(yyt < yycrank) { /* r < yycrank */ + yyt = yyr = yycrank+(yycrank-yyt); +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"compressed state\n"); +# endif + yyt = yyt + yygid; + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + yyt = yyr + YYU(yymatch[yygid]); +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"try fall back character "); + allprint_w(YYU(yymatch[yygid])); + fprintf(yyout," gid %d\n", yygid); + } +# endif + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transition */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } + if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1); +# endif + goto tryagain; + } +# endif + else + {YYUNPUT(*--yylastch);break;} + contin: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"state %d wchar_t ",yystate-yysvec-1); + allprint_w(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + ; + } +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1); + allprint_w(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + while (lsp-- > yylstate){ + *yylastch-- = 0; + if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){ + yyolsp = lsp; + if(yyextra[*yyfnd]){ /* must backup */ + while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){ + lsp--; + YYUNPUT(*yylastch--); + } + } + yyprevious = YYU(*yylastch); + yylsp = lsp; + YYLENG = yylastch-YYTEXT+1; + YYTEXT[YYLENG] = 0; +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"\nmatch "); + sprint_w(YYTEXT); + fprintf(yyout," action %d\n",*yyfnd); + } +# endif +#ifdef YYLEX_E /* -e */ + yyleng=wcstombs((char *)yytext, YYTEXT, sizeof(yytext)-1); +#ifdef LEXDEBUG + if(yyleng>=sizeof(yytext)-1) + fprintf(yyout, "yytext[] too short\n"); +#endif +#endif + return(*yyfnd++); + } + YYUNPUT(*yylastch); + } + if (YYTEXT[0] == 0 /* && feof(yyin) */) + { + yysptr=yysbuf; + return(0); + } + yyprevious = YYTEXT[0] = YYINPUT(); + if (yyprevious>0) + YYOUTPUT(yyprevious); + yylastch=YYTEXT; +# ifdef LEXDEBUG + if(debug)putchar('\n'); +# endif + } + } +int +#if defined(__cplusplus) || defined(__STDC__) +yyback(int *p, int m) +#else +yyback(p, m) + int *p; + int m; +#endif +{ + if (p==0) return(0); + while (*p) { + if (*p++ == m) + return(1); + } + return(0); +} + +#ifdef YYLEX_E /* -e */ +wchar_t +#if defined(__cplusplus) || defined(__STDC__) +yywinput(void){ +#else +yywinput(){ +#endif + static mbstate_t state; + wchar_t wc; + char b; + int n; + + for (;;) { + do { +#ifndef __cplusplus + b=input(); +#else + b=lex_input(); +#endif + } while ((n = mbrtowc(&wc, &b, 1, &state)) == (size_t)-2); + if (n == (size_t)-1) { + for (n = 0; n < sizeof state; n++) + ((char *)&state)[n] = 0; + continue; + } + break; + } + + return wc; +} + +#if defined(__cplusplus) || defined(__STDC__) +void +yywoutput(wchar_t wc) +#else +yywoutput(wc) + wchar_t wc; +#endif +{ + unsigned char eucbuf[MB_LEN_MAX]; + int n; + unsigned char *p=eucbuf; + + n=wctomb( (char *)eucbuf, wc ); +#ifndef __cplusplus + while(n-->0) output(*p++); +#else + while(n-->0) lex_output(*p++); +#endif +} + +#if defined(__cplusplus) || defined(__STDC__) +void +yywunput(wchar_t wc) +#else +yywunput(wc) + wchar_t wc; +#endif +{ + unsigned char eucbuf[MB_LEN_MAX]; + int n; + unsigned char *p; + + n=wctomb( (char *)eucbuf, wc ); + p=eucbuf+n; + while(n-->0) unput(*--p); +} +#endif + +#ifdef LONG_WCHAR_T +#define yylinearize(lc) lc +#else/*!LONG_WCHAR_T*/ +unsigned long +yylinearize(wc) + wchar_t wc; +{ + unsigned long prefix; + switch(wc&0x8080){ + case 0x0000: prefix=0x00000000; break; + case 0x0080: prefix=0x20000000; break; + case 0x8000: prefix=0x40000000; break; + case 0x8080: prefix=0x60000000; break; + } + return prefix|wc; +} +#endif/*!LONG_WCHAR_T*/ +int +yycgid(c) + wchar_t c; +{ + int first = 0; + int last = YYNCGIDTBL - 1; + unsigned long lc=yylinearize(c); + + if( yycgidtbl[YYNCGIDTBL-1] < lc ) return YYNCGIDTBL*2-1; + + while (last >= 0) { + int i = (first+last)/2; + if (lc == yycgidtbl[i]) + return (2*i); + else if ( yycgidtbl[i]<lc) { + if (lc<yycgidtbl[i+1]) + return (2*i+1); + else + first = i + 1; + }else + last = i - 1; + } + return 0; /*Error*/ +} + + + + + + /* the following are only used in the lex library */ +int +#if defined(__cplusplus) || defined(__STDC__) +yyinput(void) +#else +yyinput() +#endif +{ +#ifndef __cplusplus + return(input()); +#else + return(lex_input()); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void +yyoutput(int c) +#else +yyoutput(c) + int c; +#endif +{ +#ifndef __cplusplus + output(c); +#else + lex_output(c); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void +yyunput(int c) +#else +yyunput(c) + int c; +#endif +{ + unput(c); + } + +#ifndef YYLEX_E +/* + * Just strip all illegal byte sequences from input. + */ +wint_t +#if defined(__cplusplus) || defined(__STDC__) +yygetwchar(void) +#else +yygetwchar() +#endif +{ + static mbstate_t state; + char b; + int c, n; + wchar_t wc = WEOF; + + for (;;) { + while ((c = getc(yyin)) != EOF) { + b = c; + if ((n = mbrtowc(&wc, &b, 1, &state)) != (size_t)-2) + break; + } + if (n == (size_t)-1 || n == (size_t)-2) { + if (c == EOF) + return WEOF; + for (n = 0; n < sizeof state; n++) + ((char *)&state)[n] = 0; + continue; + } + break; + } + return wc; +} +#endif /* !YYLEX_E */ diff --git a/lex/ncform b/lex/ncform @@ -0,0 +1,290 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "ncform 6.14 05/06/10 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)ncform 1.4 (gritter) 11/18/05 + */ + +int yylineno =1; +# define YYU(x) x +# define NLSTATE yyprevious=YYNEWLINE +struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp; +char yysbuf[YYLMAX]; +char *yysptr = yysbuf; +int *yyfnd; +extern struct yysvf *yyestate; +int yyprevious = YYNEWLINE; +#if defined(__cplusplus) || defined(__STDC__) +int yylook(void) +#else +yylook() +#endif +{ + register struct yysvf *yystate, **lsp; + register struct yywork *yyt; + struct yysvf *yyz; + int yych, yyfirst; + struct yywork *yyr; +# ifdef LEXDEBUG + int debug; +# endif + char *yylastch; + /* start off machines */ +# ifdef LEXDEBUG + debug = 0; +# endif + yyfirst=1; + if (!yymorfg) + yylastch = yytext; + else { + yymorfg=0; + yylastch = yytext+yyleng; + } + for(;;){ + lsp = yylstate; + yyestate = yystate = yybgin; + if (yyprevious==YYNEWLINE) yystate++; + for (;;){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1); +# endif + yyt = yystate->yystoff; + if(yyt == yycrank && !yyfirst){ /* may not be any transitions */ + yyz = yystate->yyother; + if(yyz == 0)break; + if(yyz->yystoff == yycrank)break; + } +#ifndef __cplusplus + *yylastch++ = yych = input(); +#else + *yylastch++ = yych = lex_input(); +#endif +#ifdef YYISARRAY + if(yylastch > &yytext[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } +#else + if (yylastch >= &yytext[ yytextsz ]) { + int x = yylastch - yytext; + + yytextsz += YYTEXTSZINC; + if (yytext == yy_tbuf) { + yytext = (char *) malloc(yytextsz); + memcpy(yytext, yy_tbuf, sizeof (yy_tbuf)); + } + else + yytext = (char *) realloc(yytext, yytextsz); + if (!yytext) { + fprintf(yyout, + "Cannot realloc yytext\n"); + exit(1); + } + yylastch = yytext + x; + } +#endif + yyfirst=0; + tryagain: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"char "); + allprint(yych); + putchar('\n'); + } +# endif + yyr = yyt; + if ( yyt > yycrank){ + yyt = yyr + yych; + if (yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } +# ifdef YYOPTIM + else if(yyt < yycrank) { /* r < yycrank */ + yyt = yyr = yycrank+(yycrank-yyt); +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"compressed state\n"); +# endif + yyt = yyt + yych; + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + yyt = yyr + YYU(yymatch[yych]); +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"try fall back character "); + allprint(YYU(yymatch[yych])); + putchar('\n'); + } +# endif + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transition */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } + if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1); +# endif + goto tryagain; + } +# endif + else + {unput(*--yylastch);break;} + contin: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"state %d char ",yystate-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + ; + } +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + while (lsp-- > yylstate){ + *yylastch-- = 0; + if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){ + yyolsp = lsp; + if(yyextra[*yyfnd]){ /* must backup */ + while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){ + lsp--; + unput(*yylastch--); + } + } + yyprevious = YYU(*yylastch); + yylsp = lsp; + yyleng = yylastch-yytext+1; + yytext[yyleng] = 0; +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"\nmatch "); + sprint(yytext); + fprintf(yyout," action %d\n",*yyfnd); + } +# endif + return(*yyfnd++); + } + unput(*yylastch); + } + if (yytext[0] == 0 /* && feof(yyin) */) + { + yysptr=yysbuf; + return(0); + } +#ifndef __cplusplus + yyprevious = yytext[0] = input(); + if (yyprevious>0) + output(yyprevious); +#else + yyprevious = yytext[0] = lex_input(); + if (yyprevious>0) + lex_output(yyprevious); +#endif + yylastch=yytext; +# ifdef LEXDEBUG + if(debug)putchar('\n'); +# endif + } + } +#if defined(__cplusplus) || defined(__STDC__) +int yyback(int *p, int m) +#else +yyback(p, m) + int *p; +#endif +{ + if (p==0) return(0); + while (*p) { + if (*p++ == m) + return(1); + } + return(0); +} + /* the following are only used in the lex library */ +#if defined(__cplusplus) || defined(__STDC__) +int yyinput(void) +#else +yyinput() +#endif +{ +#ifndef __cplusplus + return(input()); +#else + return(lex_input()); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void yyoutput(int c) +#else +yyoutput(c) + int c; +#endif +{ +#ifndef __cplusplus + output(c); +#else + lex_output(c); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void yyunput(int c) +#else +yyunput(c) + int c; +#endif +{ + unput(c); + } diff --git a/lex/nrform b/lex/nrform @@ -0,0 +1,188 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 1993 by Sun Microsystems, Inc. +# + +# from OpenSolaris "nrform 6.7 05/06/08 SMI" + +# +# Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany +# +# Sccsid @(#)nrform 1.4 (gritter) 10/20/06 +# + +block data +integer cshift, csize, yynlin +common /yyllib/ cshift, csize, yynlin +data yynlin/YYNEWLINE/ +end +block data +common /yyldat/ yyfnd, ymorf, yyprev, yybgin, yytop +integer yyfnd, yymorf, yyprev, yybgin, yytop +data yybgin/1/ +data yyprev/YYNEWLINE/ +data yytop/YYTOPVAL/ +end +integer function yylook(dummy) +common /Lverif/ verif +common /Ladvan/ advan +common /Lstoff/ stoff +common /Lsfall/ sfall +common /Latable/ atable +common /Lextra/ extra +common /Lvstop/ vstop +integer verif(Sverif), advan(Sadvan),stoff(Sstoff),match(Smatch) +integer sfall(Ssfall),atable(Satable),extra(Sextra), vstop(Svstop) +integer state, lsp, r +integer ch, n +common /yyldat/ yyfnd, yymorf, yyprev, yybgin, yytop, yylsp, yylsta(YYLMAX) +common /yyxel/ yyleng, yytext +integer yyfnd, yymorf, yylsta, yylsp, yytext, yyprev, yyleng, yytop +integer lexshf, yytext(YYLMAX), yyback, yybgin +integer z, t +if (yymorf .eq. 0) + yyleng = 0 +else + yymorf=0 +1776 + lsp = 1 + state = yybgin + if (yyprev .eq. YYNEWLINE) + state = state + 1 + for (;;){ + r = stoff(state) + if (r .eq. 0){ + z = sfall(state) + if (z .eq. 0) + break + if(stoff(z) == 0) break + } + ch = input(dummy) + ich = lexshf(ch) + yyleng = yyleng+1 + yytext(yyleng) = ch + 1984 + if(r .gt. 0){ + t = r + ich + if (t<= yytop){ + if (verif(t) .eq. state){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp +1 + goto 2001 + } + } + } + if(r < 0){ + t = r + ich + if (t <= yytop .and. verif(t) .eq. state){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp +1 + goto 2001 + } + t = r + match(ich) + if(t <= yytop && state == verif(t)){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp + 1 + goto 2001 + } + } + else { + if (state > 0) state = sfall(state) + if (state .gt. 0) r = stoff(state) + if (state .gt. 0 .and. r .ne. 0) + goto 1984 + call unput(yytext(yyleng)) + yyleng = yyleng -1 + break + } + 2001 + continue + } + while (lsp .gt. 1){ + lsp = lsp -1 + ilsp = yylsta(lsp) + yyfnd = atable(ilsp) + if (yyfnd .gt. 0) + if (vstop(yyfnd) .gt. 0){ + r = vstop(yyfnd) + if (extra(r) .ne. 0){ + for(;;){ + ilsp = yylsta(lsp) + if (yyback(atable(ilsp), -r) .eq. 1) + break + lsp= lsp -1 + call unput(yytext(yyleng)) + yyleng = yyleng -1 + } + } + yyprev = lexshf(yytext(yyleng)) + yylsp = lsp + yyfnd = yyfnd + 1 + yylook = r + yytext(yyleng+1) = 0 + return + } + call unput(yytext(yyleng)) + } + if (yytext(1) .eq. 0){ + yylook=0 + return + } + yyprev = input(dummy) + call output(yyprev) + yyprev = lexshf(yyprev) + yyleng = 0 + goto 1776 +end +integer function yyback (isub, n) +common /Lvstop/ vstop +integer vstop(Svstop) +if (isub .ne. 0) +while (vstop(isub) .ne. 0){ + if (vstop(isub) .eq. m){ + yyback = 1 + return + } + isub = isub + 1 + } +yyback = 0 +return +end diff --git a/lex/once.h b/lex/once.h @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "once.h 6.9 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)once.h 1.4 (gritter) 11/26/05 + */ + +#include "ldefs.c" + +/* once.c */ + /* because of external definitions, this code should occur only once */ +int ctable[2*NCH] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255 +}; + +int ZCH = NCH; +FILE *fout = NULL, *errorf; +int sect = DEFSECTION; +int prev = '\n'; /* previous input character */ +int pres = '\n'; /* present input character */ +int peek = '\n'; /* next input character */ +CHR *pushptr = pushc; +CHR *slptr = slist; + +#ifndef CNAME +#define CNAME "./ncform" +#endif +#ifndef RATNAME +#define RATNAME "./nrform" +#endif +char *cname = CNAME; +char *ratname = RATNAME; + +int ccount = 1; +int casecount = 1; +int aptr = 1; +int nstates = NSTATES, maxpos = MAXPOS; +int treesize = TREESIZE, ntrans = NTRANS; +int yytop; +int outsize = NOUTPUT; +int sptr = 1; +int optim = TRUE; +int report = 2; +int debug; /* 1 = on */ +int charc; +char *v_stmp = "n"; +int no_input; +int copy_line; +int n_error = 0; +int fatal = 1; +int sargc; +char **sargv; +CHR buf[BUF_SIZ]; +int ratfor; /* 1 = ratfor, 0 = C */ +int yyline; /* line number of file */ +int eof; +int lgatflg; +int divflg; +int funcflag; +int pflag; +int chset; /* 1 = char set modified */ +FILE *fin, *fother; +int fptr; +int *name; +intptr_t *left; +intptr_t *right; +int *parent; +Boolean *nullstr; +int tptr; +CHR pushc[TOKENSIZE]; +CHR slist[STARTSIZE]; +CHR **def, **subs, *dchar; +/* XCU4: %x exclusive start */ +int *exclusive; +CHR **sname, *schar; +CHR *ccl; +CHR *ccptr; +CHR *dp, *sp; +int dptr; +CHR *bptr; /* store input position */ +CHR *tmpstat; +int count; +int **foll; +int *nxtpos; +int *positions; +int *gotof; +int *nexts; +CHR *nchar; +int **state; +int *sfall; /* fallback state num */ +Boolean *cpackflg; /* true if state has been character packed */ +int *atable; +int nptr; +Boolean symbol[MAXNCG]; +CHR cindex[MAXNCG]; +int xstate; +int stnum; +CHR match[MAXNCG]; +BYTE extra[NACTIONS]; +CHR *pchar, *pcptr; +int pchlen = TOKENSIZE; +long rcount; +int *verify, *advance, *stoff; +int scon; +CHR *psave; + +Boolean handleeuc = FALSE; +Boolean widecio = FALSE; + +int isArray = 1; /* XCU4: for %array %pointer */ diff --git a/lex/parser.y b/lex/parser.y @@ -0,0 +1,978 @@ +%{ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +%} +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +%{ +/* from OpenSolaris "parser.y 6.15 05/06/10 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)parser.y 1.8 (gritter) 11/26/05 + */ + +void yyerror(char *); + +#include <ctype.h> +#include <wchar.h> +#include <inttypes.h> +#ifndef __sun +#define wcsetno(c) 0 +#endif + +%} +/* parser.y */ + +/* XCU4: add XSCON: %x exclusive start token */ +/* XCU4: add ARRAY: %a yytext is char array */ +/* XCU4: add POINTER: %p yytext is a pointer to char */ +%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS XSCON ARRAY POINTER + +%nonassoc ARRAY POINTER +%left XSCON SCON NEWE +%left '/' +/* + * XCU4: lower the precedence of $ and ^ to less than the or operator + * per Spec. 1170 + */ +%left '$' '^' +%left '|' +%left CHAR CCL NCCL '(' '.' STR NULLS +%left ITER +%left CAT +%left '*' '+' '?' + +%{ +#include "ldefs.c" + +#define YYSTYPE union _yystype_ +union _yystype_ +{ + int i; + CHR *cp; +}; +int peekon = 0; /* need this to check if "^" came in a definition section */ + +%} +%% +%{ +int i; +int j,k; +int g; +CHR *p; +static wchar_t L_PctUpT[]= {'%', 'T', 0}; +static wchar_t L_PctLoT[]= {'%', 't', 0}; +static wchar_t L_PctCbr[]= {'%', '}', 0}; +%} +acc : lexinput + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } + ; +lexinput: defns delim prods end + | defns delim end + ={ + if(!funcflag)phead2(); + funcflag = TRUE; + } + | error + ={ +# ifdef DEBUG + if(debug) { + sect1dump(); + sect2dump(); + } +# endif + fatal = 0; + n_error++; + error("Illegal definition"); + fatal = 1; + } + ; +end: delim | ; +defns: defns STR STR + ={ scopy($2.cp,dp); + def[dptr] = dp; + dp += slength($2.cp) + 1; + scopy($3.cp,dp); + subs[dptr++] = dp; + if(dptr >= DEFSIZE) + error("Too many definitions"); + dp += slength($3.cp) + 1; + if(dp >= dchar+DEFCHAR) + error("Definitions too long"); + subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ + } + | + ; +delim: DELIM + ={ +# ifdef DEBUG + if(sect == DEFSECTION && debug) sect1dump(); +# endif + sect++; + } + ; +prods: prods pr + ={ $$.i = mn2(RNEWE,$1.i,$2.i); + } + | pr + ={ $$.i = $1.i;} + ; +pr: r NEWE + ={ + if(divflg == TRUE) + i = mn1(S1FINAL,casecount); + else i = mn1(FINAL,casecount); + $$.i = mn2(RCAT,$1.i,i); + divflg = FALSE; + if((++casecount)>NACTIONS) + error("Too many (>%d) pattern-action rules.", NACTIONS); + } + | error NEWE + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + fatal = 0; + yyline--; + n_error++; + error("Illegal rule"); + fatal = 1; + yyline++; + } +r: CHAR + ={ $$.i = mn0($1.i); } + | STR + ={ + p = (CHR *)$1.cp; + i = mn0((unsigned)(*p++)); + while(*p) + i = mn2(RSTR,i,(unsigned)(*p++)); + $$.i = i; + } + | '.' + ={ + $$.i = mn0(DOT); + } + | CCL + ={ $$.i = mn1(RCCL,(intptr_t)$1.cp); } + | NCCL + ={ $$.i = mn1(RNCCL,(intptr_t)$1.cp); } + | r '*' + ={ $$.i = mn1(STAR,$1.i); } + | r '+' + ={ $$.i = mn1(PLUS,$1.i); } + | r '?' + ={ $$.i = mn1(QUEST,$1.i); } + | r '|' r + ={ $$.i = mn2(BAR,$1.i,$3.i); } + | r r %prec CAT + ={ $$.i = mn2(RCAT,$1.i,$2.i); } + | r '/' r + ={ if(!divflg){ + j = mn1(S2FINAL,-casecount); + i = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,i,$3.i); + } + else { + $$.i = mn2(RCAT,$1.i,$3.i); + error("illegal extra slash"); + } + divflg = TRUE; + } + | r ITER ',' ITER '}' + ={ if($2.i > $4.i){ + i = $2.i; + $2.i = $4.i; + $4.i = i; + } + if($4.i <= 0) + error("iteration range must be positive"); + else { + j = $1.i; + for(k = 2; k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + for(i = $2.i+1; i<=$4.i; i++){ + g = dupl($1.i); + for(k=2;k<=i;k++) + g = mn2(RCAT,g,dupl($1.i)); + j = mn2(BAR,j,g); + } + $$.i = j; + } + } + | r ITER '}' + ={ + if($2.i < 0)error("can't have negative iteration"); + else if($2.i == 0) $$.i = mn0(RNULLS); + else { + j = $1.i; + for(k=2;k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + $$.i = j; + } + } + | r ITER ',' '}' + ={ + /* from n to infinity */ + if($2.i < 0)error("can't have negative iteration"); + else if($2.i == 0) $$.i = mn1(STAR,$1.i); + else if($2.i == 1)$$.i = mn1(PLUS,$1.i); + else { /* >= 2 iterations minimum */ + j = $1.i; + for(k=2;k<$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + k = mn1(PLUS,dupl($1.i)); + $$.i = mn2(RCAT,j,k); + } + } + | SCON r + ={ $$.i = mn2(RSCON,$2.i,(intptr_t)$1.cp); } + + /* XCU4: add XSCON */ + | XSCON r + ={ $$.i = mn2(RXSCON,$2.i,(intptr_t)$1.cp); } + | '^' r + ={ $$.i = mn1(CARAT,$2.i); } + | r '$' + ={ i = mn0('\n'); + if(!divflg){ + j = mn1(S2FINAL,-casecount); + k = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,k,i); + } + else $$.i = mn2(RCAT,$1.i,i); + divflg = TRUE; + } + | '(' r ')' + ={ $$.i = $2.i; } + | NULLS + ={ $$.i = mn0(RNULLS); } + + /* XCU4: add ARRAY and POINTER */ + | ARRAY + ={ isArray = 1; }; + | POINTER + ={ isArray = 0; }; + ; + +%% +int +yylex(void) +{ + CHR *p; + int i; + CHR *xp; + int lex_startcond_lookupval; + CHR *t, c; + int n, j = 0, k, x; + CHR ch; + static int sectbegin; + static CHR token[TOKENSIZE]; + static int iter; + int ccs; /* Current CodeSet. */ + CHR *ccp; + int exclusive_flag; /* XCU4: exclusive start flag */ + +# ifdef DEBUG + yylval.i = 0; +# endif + + if(sect == DEFSECTION) { /* definitions section */ + while(!eof) { + if(prev == '\n'){ /* next char is at beginning of line */ + getl(p=buf); + switch(*p){ + case '%': + switch(c= *(p+1)){ + case '%': + if(scomp(p, (CHR *)"%%")) { + p++; + while(*(++p)) + if(!space(*p)) { + warning("invalid string following %%%% be ignored"); + break; + } + } + lgate(); + if(!ratfor)fprintf(fout,"# "); + fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']); + if(!ratfor) { + fprintf(fout,"int yylex(){\nint nstr = 0; extern int yyprevious;\n"); + } + sectbegin = TRUE; + i = treesize*(sizeof(*name)+sizeof(*left)+ + sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; + p = myalloc(i,1); + if(p == NULL) + error("Too little core for parse tree"); + free(p); + name = myalloc(treesize,sizeof(*name)); + left = myalloc(treesize,sizeof(*left)); + right = myalloc(treesize,sizeof(*right)); + nullstr = myalloc(treesize,sizeof(*nullstr)); + parent = myalloc(treesize,sizeof(*parent)); + if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0) + error("Too little core for parse tree"); + return(freturn(DELIM)); + case 'p': case 'P': + /* %p or %pointer */ + if ((*(p+2) == 'o') || + (*(p+2) == 'O')) { + if(lgatflg) + error("Too late for %%pointer"); + while(*p && !iswspace(*p)) + p++; + isArray = 0; + continue; + } + /* has overridden number of positions */ + p += 2; + maxpos = siconv(p); + if (maxpos<=0)error("illegal position number"); +# ifdef DEBUG + if (debug) printf("positions (%%p) now %d\n",maxpos); +# endif + if(report == 2)report = 1; + continue; + case 'n': case 'N': /* has overridden number of states */ + p += 2; + nstates = siconv(p); + if(nstates<=0)error("illegal state number"); +# ifdef DEBUG + if(debug)printf( " no. states (%%n) now %d\n",nstates); +# endif + if(report == 2)report = 1; + continue; + case 'e': case 'E': /* has overridden number of tree nodes */ + p += 2; + treesize = siconv(p); + if(treesize<=0)error("illegal number of parse tree nodes"); +# ifdef DEBUG + if (debug) printf("treesize (%%e) now %d\n",treesize); +# endif + if(report == 2)report = 1; + continue; + case 'o': case 'O': + p += 2; + outsize = siconv(p); + if(outsize<=0)error("illegal size of output array"); + if (report ==2) report=1; + continue; + case 'a': case 'A': + /* %a or %array */ + if ((*(p+2) == 'r') || + (*(p+2) == 'R')) { + if(lgatflg) + error("Too late for %%array"); + while(*p && !iswspace(*p)) + p++; + isArray = 1; + continue; + } + /* has overridden number of transitions */ + p += 2; + ntrans = siconv(p); + if(ntrans<=0)error("illegal translation number"); +# ifdef DEBUG + if (debug)printf("N. trans (%%a) now %d\n",ntrans); +# endif + if(report == 2)report = 1; + continue; + case 'k': case 'K': /* overriden packed char classes */ + p += 2; + free(pchar); + pchlen = siconv(p); + if(pchlen<=0)error("illegal number of packed character class"); +# ifdef DEBUG + if (debug) printf( "Size classes (%%k) now %d\n",pchlen); +# endif + pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); + if (report==2) report=1; + continue; + case 't': case 'T': /* character set specifier */ + if(handleeuc) + error("\ +Character table (%t) is supported only in ASCII compatibility mode.\n"); + ZCH = wcstol(p+2, NULL, 10); + if (ZCH < NCH) ZCH = NCH; + if (ZCH > 2*NCH) error("ch table needs redeclaration"); + chset = TRUE; + for(i = 0; i<ZCH; i++) + ctable[i] = 0; + while(getl(p) && scomp(p,L_PctUpT) != 0 && scomp(p,L_PctLoT) != 0){ + if((n = siconv(p)) <= 0 || n > ZCH){ + error("Character value %d out of range",n); + continue; + } + while(digit(*p)) p++; + if(!iswspace(*p)) error("bad translation format"); + while(iswspace(*p)) p++; + t = p; + while(*t){ + c = ctrans(&t); + if(ctable[(unsigned)c]){ + if (iswprint(c)) + warning("Character '%lc' used twice",c); + + else + error("Chararter %o used twice",c); + } + else ctable[(unsigned)c] = n; + t++; + } + p = buf; + } + { + char chused[2*NCH]; int kr; + for(i=0; i<ZCH; i++) + chused[i]=0; + for(i=0; i<NCH; i++) + chused[ctable[i]]=1; + for(kr=i=1; i<NCH; i++) + if (ctable[i]==0) + { + while (chused[kr] == 0) + kr++; + ctable[i]=kr; + chused[kr]=1; + } + } + lgate(); + continue; + case 'r': case 'R': + c = 'r'; + /* FALLTHRU */ + case 'c': case 'C': + if(lgatflg) + error("Too late for language specifier"); + ratfor = (c == 'r'); + continue; + case '{': + lgate(); + while(getl(p) && scomp(p, L_PctCbr) != 0) + if(p[0]=='/' && p[1]=='*') + cpycom(p); + else + fprintf(fout,"%ls\n",p); + if(p[0] == '%') continue; + if (*p) error("EOF before %%%%"); + else error("EOF before %%}"); + break; + + case 'x': case 'X': /* XCU4: exclusive start conditions */ + exclusive_flag = 1; + goto start; + + case 's': case 'S': /* start conditions */ + exclusive_flag = 0; +start: + lgate(); + + while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) p++; + n = TRUE; + while(n){ + while(*p && (iswspace(*p) || ((*p) == (wchar_t)','))) p++; + t = p; + while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) { + if(!isascii(*p)) + error("None-ASCII characters in start condition."); + p++; + } + if(!*p) n = FALSE; + *p++ = 0; + if (*t == 0) continue; + i = sptr*2; + if(!ratfor)fprintf(fout,"# "); + fprintf(fout,"define %ls %d\n",t,i); + scopy(t,sp); + sname[sptr] = sp; + /* XCU4: save exclusive flag with start name */ + exclusive[sptr++] = exclusive_flag; + sname[sptr] = 0; /* required by lookup */ + if(sptr >= STARTSIZE) + error("Too many start conditions"); + sp += slength(sp) + 1; + if(sp >= schar+STARTCHAR) + error("Start conditions too long"); + } + continue; + default: + error("Invalid request %s",p); + continue; + } /* end of switch after seeing '%' */ + break; + case ' ': case '\t': /* must be code */ + lgate(); + if( p[1]=='/' && p[2]=='*' ) cpycom(p); + else fprintf(fout, "%ls\n",p); + continue; + case '/': /* look for comments */ + lgate(); + if((*(p+1))=='*') cpycom(p); + /* FALLTHRU */ + default: /* definition */ + while(*p && !iswspace(*p)) p++; + if(*p == 0) + continue; + prev = *p; + *p = 0; + bptr = p+1; + yylval.cp = (CHR *)buf; + if(digit(buf[0])) + warning("Substitution strings may not begin with digits"); + return(freturn(STR)); + } + } else { /* still sect 1, but prev != '\n' */ + p = bptr; + while(*p && iswspace(*p)) p++; + if(*p == 0) + warning("No translation given - null string assumed"); + scopy(p,token); + yylval.cp = (CHR *)token; + prev = '\n'; + return(freturn(STR)); + } + } + error("unexpected EOF before %%%%"); + /* end of section one processing */ + } else if(sect == RULESECTION){ /* rules and actions */ + lgate(); + while(!eof){ + static int first_test=TRUE, first_value; + static int reverse=FALSE; + switch(c=gch()){ + case '\0': + if(n_error)error_tail(); + return(freturn(0)); + case '\n': + if(prev == '\n') continue; + x = NEWE; + break; + case ' ': + case '\t': + if(prev == '\n') copy_line = TRUE; + if(sectbegin == TRUE){ + cpyact(); + copy_line = FALSE; + while((c=gch()) && c != '\n'); + continue; + } + if(!funcflag)phead2(); + funcflag = TRUE; + if(ratfor)fprintf(fout,"%d\n",30000+casecount); + else fprintf(fout,"case %d:\n",casecount); + if(cpyact()){ + if(ratfor)fprintf(fout,"goto 30997\n"); + else fprintf(fout,"break;\n"); + } + while((c=gch()) && c != '\n') { + if (c=='/') { + if((c=gch())=='*') { + c=gch(); + while(c !=EOF) { + while (c=='*') + if ((c=gch()) == '/') goto w_loop; + c = gch(); + } + error("EOF inside comment"); + } else + warning("undefined string"); + } else if (c=='}') + error("illegal extra \"}\""); + w_loop: ; + } + /* while ((c=gch())== ' ' || c == '\t') ; */ + /* if (!space(c)) error("undefined action string"); */ + if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ + fatal = 0; + n_error++; + error("executable statements should occur right after %%%%"); + fatal = 1; + continue; + } + x = NEWE; + break; + case '%': + if(prev != '\n') goto character; + if(peek == '{'){ /* included code */ + getl(buf); + while(!eof&& getl(buf) && scomp(L_PctCbr,buf)!=0) + if(buf[0]=='/' && buf[1]=='*') + cpycom(buf); + else + fprintf(fout,"%ls\n",buf); + continue; + } + if(peek == '%'){ + c = gch(); + c = gch(); + x = DELIM; + break; + } + goto character; + case '|': + if(peek == ' ' || peek == '\t' || peek == '\n'){ + if(ratfor)fprintf(fout,"%d\n",30000+casecount++); + else fprintf(fout,"case %d:\n",casecount++); + continue; + } + x = '|'; + break; + case '$': + if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ + x = c; + break; + } + goto character; + case '^': + if(peekon && (prev == '}')){ + x = c; + break; + } + if(prev != '\n' && scon != TRUE) goto character; + /* valid only at line begin */ + x = c; + break; + case '?': + case '+': + case '*': + if(prev == '\n' ) { + fatal = 0; + n_error++; + error("illegal operator -- %c",c); + fatal = 1; + } + /* FALLTHRU */ + case '.': + case '(': + case ')': + case ',': + case '/': + x = c; + break; + case '}': + iter = FALSE; + x = c; + break; + case '{': /* either iteration or definition */ + if(digit(c=gch())){ /* iteration */ + iter = TRUE; + if(prev=='{') first_test = TRUE; + ieval: + i = 0; + while(digit(c)){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + yylval.i = siconv(token); + if(first_test) { + first_test = FALSE; + first_value = yylval.i; + } else + if(first_value>yylval.i)warning("the values between braces are reversed"); + ch = c; + munput('c',&ch); + x = ITER; + break; + } + else { /* definition */ + i = 0; + while(c && c!='}'){ + token[i++] = c; + if(i >= TOKENSIZE) + error("definition too long"); + c = gch(); + } + token[i] = 0; + i = lookup(token,def); + if(i < 0) + error("definition %ls not found",token); + else + munput('s',(CHR *)(subs[i])); + if (peek == '^') + peekon = 1; + continue; + } + case '<': /* start condition ? */ + if(prev != '\n') /* not at line begin, not start */ + goto character; + t = slptr; + do { + i = 0; + if(!isascii(c = gch())) + error("Non-ASCII characters in start condition."); + while(c != ',' && c && c != '>'){ + token[i++] = c; + if(i >= TOKENSIZE) + error("string name too long"); + if(!isascii(c = gch())) + error("None-ASCII characters in start condition."); + } + token[i] = 0; + if(i == 0) + goto character; + i = lookup(token,sname); + lex_startcond_lookupval = i; + if(i < 0) { + fatal = 0; + n_error++; + error("undefined start condition %ls",token); + fatal = 1; + continue; + } + *slptr++ = i+1; + } while(c && c != '>'); + *slptr++ = 0; + /* check if previous value re-usable */ + for (xp=slist; xp<t; ) + { + if (scomp(xp, t)==0) + break; + while (*xp++); + } + if (xp<t) + { + /* re-use previous pointer to string */ + slptr=t; + t=xp; + } + if(slptr > slist+STARTSIZE) /* note not packed */ + error("Too many start conditions used"); + yylval.cp = (CHR *)t; + + /* XCU4: add XSCON */ + + if (exclusive[lex_startcond_lookupval]) + x = XSCON; + else + x = SCON; + break; + case '"': + i = 0; + while((c=gch()) && c != '"' && c != '\n'){ + if(c == '\\') c = usescape(c=gch()); + remch(c); + token[i++] = c; + if(i >= TOKENSIZE){ + warning("String too long"); + i = TOKENSIZE-1; + break; + } + } + if(c == '\n') { + yyline--; + warning("Non-terminated string"); + yyline++; + } + token[i] = 0; + if(i == 0)x = NULLS; + else if(i == 1){ + yylval.i = (unsigned)token[0]; + x = CHAR; + } + else { + yylval.cp = (CHR *)token; + x = STR; + } + break; + case '[': + reverse = FALSE; + x = CCL; + if((c = gch()) == '^'){ + x = NCCL; + reverse = TRUE; + c = gch(); + } + i = 0; + while(c != ']' && c){ + static int light=TRUE, ESCAPE=FALSE; + if(c == '-' && prev == '^' && reverse){ + symbol[(unsigned)c] = 1; + c = gch(); + continue; + } + if(c == '\\') { + c = usescape(c=gch()); + ESCAPE = TRUE; + } + if(c=='-' && !ESCAPE && prev!='[' && peek!=']'){ + /* range specified */ + if (light) { + c = gch(); + if(c == '\\') + c=usescape(c=gch()); + remch(c); + k = c; + ccs=wcsetno(k); + if(wcsetno(j)!=ccs) + error("\ +Character range specified between different codesets."); + if((unsigned)j > (unsigned)k) { + n = j; + j = k; + k = n; + } + if(!handleeuc) + if(!(('A'<=j && k<='Z') || + ('a'<=j && k<='z') || + ('0'<=j && k<='9'))) + warning("Non-portable Character Class"); + token[i++] = RANGE; + token[i++] = j; + token[i++] = k; + light = FALSE; + } else { + error("unmatched hyphen"); + if(symbol[(unsigned)c])warning("\"%c\" redefined inside brackets",c); + else symbol[(unsigned)c] = 1; + } + ESCAPE = FALSE; + } else { + j = c; + remch(c); + token[i++] = c; /* Remember whatever.*/ + light = TRUE; + ESCAPE = FALSE; + } + c = gch(); + } + /* try to pack ccl's */ + + token[i] = 0; + ccp = ccl; + while (ccp < ccptr && scomp(token, ccp) != 0) ccp++; + if (ccp < ccptr) { /* found in ccl */ + yylval.cp = ccp; + } else { /* not in ccl, add it */ + scopy(token,ccptr); + yylval.cp = ccptr; + ccptr += slength(token) + 1; + if(ccptr >= ccl+CCLSIZE) + error("Too many large character classes"); + } + break; + case '\\': + c = usescape(c=gch()); + default: + character: + if(iter){ /* second part of an iteration */ + iter = FALSE; + if('0' <= c && c <= '9') + goto ieval; + } + remch(c); + if(alpha(peek)){ + i = 0; + yylval.cp = (CHR *)token; + token[i++] = c; + while(alpha(peek)) { + remch(token[i++] = gch()); + if(i >= TOKENSIZE) { + warning("string too long"); + i = TOKENSIZE - 1; + break; + } + } + if(peek == '?' || peek == '*' || peek == '+') + munput('c',&token[--i]); + token[i] = 0; + if(i == 1){ + yylval.i = (unsigned)(token[0]); + x = CHAR; + } + else x = STR; + } + else { + yylval.i = (unsigned)c; + x = CHAR; + } + } + scon = FALSE; + peekon = 0; + if((x == SCON) || (x == XSCON)) + scon = TRUE; + sectbegin = FALSE; + return(freturn(x)); + /* NOTREACHED */ + } + } + /* section three */ + lgate(); + ptail(); +# ifdef DEBUG + if(debug) + fprintf(fout,"\n/*this comes from section three - debug */\n"); +# endif + + if(getl(buf) && !eof) { + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline-1); + else + fprintf(fout, + "\n# line %d \"%s\"\n", yyline-1, sargv[optind]); + fprintf(fout,"%ls\n",buf); + while(getl(buf) && !eof) + fprintf(fout,"%ls\n",buf); + } + + return(freturn(0)); + } +/* end of yylex */ +# ifdef DEBUG +freturn(i) + int i; { + if(yydebug) { + printf("now return "); + if((unsigned)i < NCH) allprint(i); + else printf("%d",i); + printf(" yylval = "); + switch(i){ + case STR: case CCL: case NCCL: + strpt(yylval.cp); + break; + case CHAR: + allprint(yylval.i); + break; + default: + printf("%d",yylval.i); + break; + } + putchar('\n'); + } + return(i); + } +# endif diff --git a/lex/reject.c b/lex/reject.c @@ -0,0 +1,158 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "reject.c 6.10 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)reject.c 1.4 (gritter) 11/27/05 + */ + +#include <stdio.h> + +#ifdef EUC +#ifdef __sun +#include <euc.h> +#include <widec.h> +#else /* !sun */ +#include <wchar.h> +#endif /* !sun */ +#include <limits.h> +#endif + + +#ifndef JLSLEX + +#define CHR char +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define YYREJECT yyreject +#endif + +#ifdef WOPTION + +#define CHR wchar_t +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define YYREJECT yyreject_w +#endif + +#ifdef EOPTION + +#define CHR wchar_t +#define YYTEXT yywtext +#define YYLENG yywleng +#define YYINPUT yywinput +#define YYUNPUT yywunput +#define YYOUTPUT yywoutput +#define YYREJECT yyreject_e +extern unsigned char yytext[]; +extern int yyleng; +#endif + +#if defined(__cplusplus) || defined(__STDC__) +extern int yyback(int *, int); +extern int YYINPUT(void); +extern void YYUNPUT(int); +#ifdef EUC + static int yyracc(int); +#else + extern int yyracc(int); +#endif +#ifdef EOPTION + extern size_t wcstombs(char *, const wchar_t *, size_t); +#endif +#endif + +extern FILE *yyout, *yyin; + +extern int yyprevious, *yyfnd; + +extern char yyextra[]; + +extern int YYLENG; +extern CHR YYTEXT[]; + +extern struct {int *yyaa, *yybb; int *yystops; } *yylstate[], **yylsp, **yyolsp; +#if defined(__cplusplus) || defined(__STDC__) +int +YYREJECT(void) +#else +YYREJECT() +#endif +{ + for (; yylsp < yyolsp; yylsp++) + YYTEXT[YYLENG++] = YYINPUT(); + if (*yyfnd > 0) + return (yyracc(*yyfnd++)); + while (yylsp-- > yylstate) { + YYUNPUT(YYTEXT[YYLENG-1]); + YYTEXT[--YYLENG] = 0; + if (*yylsp != 0 && (yyfnd = (*yylsp)->yystops) && *yyfnd > 0) + return (yyracc(*yyfnd++)); + } +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + if (YYTEXT[0] == 0) + return (0); + YYLENG = 0; +#ifdef EOPTION + yyleng = 0; +#endif + return (-1); +} + +#ifdef EUC +static +#endif +#if defined(__cplusplus) || defined(__STDC__) +int +yyracc(int m) +#else +yyracc(m) +#endif +{ + yyolsp = yylsp; + if (yyextra[m]) { + while (yyback((*yylsp)->yystops, -m) != 1 && yylsp > yylstate) { + yylsp--; + YYUNPUT(YYTEXT[--YYLENG]); + } + } + yyprevious = YYTEXT[YYLENG-1]; + YYTEXT[YYLENG] = 0; +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + return (m); +} diff --git a/lex/search.h b/lex/search.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef SEARCH_H +#define SEARCH_H + +/* from OpenSolaris "search.h 1.19 05/06/08 SMI" SVr4.0 1.3.1.11 */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)search.h 1.4 (gritter) 11/26/05 + */ + +#include <sys/types.h> + + +void *xlsearch(const void *, void *, unsigned *, unsigned, + int (*)(const void *, const void *)); + +#endif /* SEARCH_H */ diff --git a/lex/sgs.h b/lex/sgs.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + */ +const char sccsid[] = "@(#)lex.sl 2.13 (gritter) 4/14/07"; +const char pkg[] = "Heirloom Development Tools"; +const char rel[] = "2.13 (gritter) 4/14/07"; +/* SLIST */ +/* +allprint.c: * Sccsid @(#)allprint.c 1.4 (gritter) 11/27/05 +getopt.c: * Sccsid @(#)getopt.c 1.9 (gritter) 4/2/07 +header.c: * Sccsid @(#)header.c 1.12 (gritter) 9/23/06 +ldefs.c: * Sccsid @(#)ldefs.c 1.7 (gritter) 4/14/07 +libmain.c: * Sccsid @(#)libmain.c 1.4 (gritter) 11/26/05 +lsearch.c: * Sccsid @(#)lsearch.c 1.4 (gritter) 11/26/05 +main.c: * Sccsid @(#)main.c 1.9 (gritter) 11/26/05 +once.h: * Sccsid @(#)once.h 1.4 (gritter) 11/26/05 +parser.y: * Sccsid @(#)parser.y 1.8 (gritter) 11/26/05 +reject.c: * Sccsid @(#)reject.c 1.4 (gritter) 11/27/05 +search.h: * Sccsid @(#)search.h 1.4 (gritter) 11/26/05 +sub1.c: * Sccsid @(#)sub1.c 1.5 (gritter) 11/26/05 +sub2.c: * Sccsid @(#)sub2.c 1.7 (gritter) 01/12/07 +sub3.c: * Sccsid @(#)sub3.c 1.4 (gritter) 11/26/05 +wcio.c: * Sccsid @(#)wcio.c 1.1 (gritter) 6/25/05 +yyless.c: * Sccsid @(#)yyless.c 1.6 (gritter) 11/27/05 +yywrap.c: * Sccsid @(#)yywrap.c 1.3 (gritter) 6/18/05 +nceucform: * Sccsid @(#)nceucform 1.6 (gritter) 11/18/05 +ncform: * Sccsid @(#)ncform 1.4 (gritter) 11/18/05 +nrform:# Sccsid @(#)nrform 1.4 (gritter) 10/20/06 + +*/ diff --git a/lex/sub1.c b/lex/sub1.c @@ -0,0 +1,1017 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "sub1.c 6.18 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub1.c 1.5 (gritter) 11/26/05 + */ + +#include "ldefs.c" +#include <limits.h> +#include <wchar.h> +#include <ctype.h> +#include <stdarg.h> + +/* + * return next line of input, throw away trailing '\n' + * and also throw away trailing blanks (spaces and tabs) + * returns 0 if eof is had immediately + */ + +CHR * +getl(CHR *p) +{ + int c; + CHR *s, *t, *u = NULL; + int blank = 0; + + t = s = p; + while (((c = gch()) != 0) && c != '\n') { + if (t >= &p[BUF_SIZ]) + error("definitions too long"); + if (c == ' ' || c == '\t') { + if (!blank) { + blank = 1; + u = t; + } + } else + blank = 0; + + *t++ = c; + } + if (blank) + *u = 0; + else + *t = 0; + + if (c == 0 && s == t) + return ((CHR *) 0); + prev = '\n'; + pres = '\n'; + return (s); +} + +int +space(int ch) +{ + switch (ch) { + case ' ': + case '\t': + case '\n': + return (1); + } + return (0); +} + +int +digit(int c) +{ + return (c >= '0' && c <= '9'); +} + +/* VARARGS1 */ +void +error(const char *s, ...) +{ + va_list ap; + + /* if(!eof) */ + if (!yyline) + fprintf(errorf, "Command line: "); + else { + fprintf(errorf, !no_input ? "" : "\"%s\":", sargv[optind]); + fprintf(errorf, "line %d: ", yyline); + } + fprintf(errorf, "Error: "); + va_start(ap, s); + vfprintf(errorf, s, ap); + va_end(ap); + putc('\n', errorf); + if (fatal) + error_tail(); +} + +void +error_tail(void) +{ +#ifdef DEBUG + if (debug && sect != ENDSECTION) { + sect1dump(); + sect2dump(); + } +#endif + + if (report == 1) + statistics(); + exit(1); + /* NOTREACHED */ +} + +/* VARARGS1 */ +void +warning(const char *s, ...) +{ + va_list ap; + + if (!eof) + if (!yyline) + fprintf(errorf, "Command line: "); + else { + fprintf(errorf, !no_input?"":"\"%s\":", sargv[optind]); + fprintf(errorf, "line %d: ", yyline); + } + fprintf(errorf, "Warning: "); + va_start(ap, s); + vfprintf(errorf, s, ap); + va_end(ap); + putc('\n', errorf); + fflush(errorf); + if (fout) + fflush(fout); + fflush(stdout); +} + +int +index(int a, CHR *s) +{ + int k; + for (k = 0; s[k]; k++) + if (s[k] == a) + return (k); + return (-1); +} + +int +alpha(int c) +{ + return ('a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z'); +} + +int +printable(int c) +{ + return (c > 040 && c < 0177); +} + +void +lgate(void) +{ + char fname[20]; + + if (lgatflg) + return; + lgatflg = 1; + if (fout == NULL) { + sprintf(fname, "lex.yy.%c", ratfor ? 'r' : 'c'); + fout = fopen(fname, "w"); + } + if (fout == NULL) + error("Can't open %s", fname); + if (ratfor) + fprintf(fout, "#\n"); + phead1(); +} + +/* + * scopy(ptr to str, ptr to str) - copy first arg str to second + * returns ptr to second arg + */ +void +scopy(CHR *s, CHR *t) +{ + CHR *i; + i = t; + while (*i++ = *s++); +} + +/* + * convert string t, return integer value + */ +int +siconv(CHR *t) +{ + int i, sw; + CHR *s; + s = t; + while (space(*s)) + s++; + if (!digit(*s) && *s != '-') + error("missing translation value"); + sw = 0; + if (*s == '-') { + sw = 1; + s++; + } + if (!digit(*s)) + error("incomplete translation format"); + i = 0; + while ('0' <= *s && *s <= '9') + i = i * 10 + (*(s++)-'0'); + return (sw ? -i : i); +} + +/* + * slength(ptr to str) - return integer length of string arg + * excludes '\0' terminator + */ +int +slength(CHR *s) +{ + int n; + CHR *t; + t = s; + for (n = 0; *t++; n++); + return (n); +} + +/* + * scomp(x,y) - return -1 if x < y, + * 0 if x == y, + * return 1 if x > y, all lexicographically + */ +int +scomp(CHR *x, CHR *y) +{ + CHR *a, *d; + a = (CHR *) x; + d = (CHR *) y; + while (*a || *d) { + if (*a > *d) + return (1); + if (*a < *d) + return (-1); + a++; + d++; + } + return (0); +} + +int +ctrans(CHR **ss) +{ + int c, k; + if ((c = **ss) != '\\') + return (c); + switch (c = *++*ss) { + case 'a': + c = '\a'; + warning("\\a is ANSI C \"alert\" character"); + break; + case 'v': c = '\v'; break; + case 'n': c = '\n'; break; + case 't': c = '\t'; break; + case 'r': c = '\r'; break; + case 'b': c = '\b'; break; + case 'f': c = 014; break; /* form feed for ascii */ + case '\\': c = '\\'; break; + case 'x': { + int dd; + warning("\\x is ANSI C hex escape"); + if (digit((dd = *++*ss)) || + ('a' <= dd && dd <= 'f') || + ('A' <= dd && dd <= 'F')) { + c = 0; + while (digit(dd) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + if (digit(dd)) + c = c*16 + dd - '0'; + else if (dd >= 'a') + c = c*16 + 10 + dd - 'a'; + else + c = c*16 + 10 + dd - 'A'; + dd = *++*ss; + } + } else + c = 'x'; + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while ((k = *(*ss+1)) >= '0' && k <= '7') { + c = c*8 + k - '0'; + (*ss)++; + } + break; + } + return (c); +} + +void +cclinter(int sw) +{ + /* sw = 1 ==> ccl */ + int i, j, k; + int m; + if (!sw) { /* is NCCL */ + for (i = 1; i < ncg; i++) + symbol[i] ^= 1; /* reverse value */ + } + for (i = 1; i < ncg; i++) + if (symbol[i]) + break; + if (i >= ncg) + return; + i = cindex[i]; + /* see if ccl is already in our table */ + j = 0; + if (i) { + for (j = 1; j < ncg; j++) { + if ((symbol[j] && cindex[j] != i) || + (!symbol[j] && cindex[j] == i)) + break; + } + } + if (j >= ncg) + return; /* already in */ + m = 0; + k = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + if (!cindex[i]) { + cindex[i] = ccount; + symbol[i] = 0; + m = 1; + } else + k = 1; + } + } + /* m == 1 implies last value of ccount has been used */ + if (m) + ccount++; + if (k == 0) + return; /* is now in as ccount wholly */ + /* intersection must be computed */ + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + m = 0; + j = cindex[i]; /* will be non-zero */ + for (k = 1; k < ncg; k++) { + if (cindex[k] == j) { + if (symbol[k]) + symbol[k] = 0; + else { + cindex[k] = ccount; + m = 1; + } + } + } + if (m) + ccount++; + } + } +} + +int +usescape(int c) +{ + char d; + switch (c) { + case 'a': + c = '\a'; + warning("\\a is ANSI C \"alert\" character"); break; + case 'v': c = '\v'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'b': c = '\b'; break; + case 'f': c = 014; break; /* form feed for ascii */ + case 'x': { + int dd; + if (digit((dd = gch())) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + c = 0; + while (digit(dd) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + if (digit(dd)) + c = c*16 + dd - '0'; + else if (dd >= 'a') + c = c*16 + 10 + dd - 'a'; + else + c = c*16 + 10 + dd - 'A'; + if (!digit(peek) && + !('A' <= peek && peek <= 'F') && + !('a' <= peek && peek <= 'f')) + break; + dd = gch(); + } + + } else + c = 'x'; + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while ('0' <= (d = gch()) && d <= '7') { + c = c * 8 + (d-'0'); + if (!('0' <= peek && peek <= '7')) break; + } + + break; + } + + if (handleeuc && !isascii(c)) { + char tmpchar = c & 0x00ff; + wchar_t wc; + mbtowc(&wc, &tmpchar, sizeof (tmpchar)); + c = wc; + } + return (c); +} + +int +lookup(CHR *s, CHR **t) +{ + int i; + i = 0; + while (*t) { + if (scomp(s, *t) == 0) + return (i); + i++; + t++; + } + return (-1); +} + +void +cpycom(CHR *p) +{ + static CHR *t; + static int c; + t = p; + + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline); + else + fprintf(fout, "\n# line %d \"%s\"\n", yyline, sargv[optind]); + + putc(*t, fout), t++; + putc(*t, fout), t++; + while (*t) { + while (*t == '*') { + putc(*t, fout), t++; + if (*t == '/') + goto backcall; + } + /* + * FIX BUG #1058428, not parsing comments correctly + * that span more than one line + */ + if (*t != '\0') + putc(*t, fout), t++; + } + putc('\n', fout); + while (c = gch()) { + while (c == '*') { + putc(c, fout); + if ((c = gch()) == '/') { + while ((c = gch()) == ' ' || c == '\t'); + if (!space(c)) + error("unacceptable statement"); + prev = '\n'; + goto backcall; + } + } + putc(c, fout); + } + error("unexpected EOF inside comment"); +backcall: + putc('/', fout); + putc('\n', fout); +} + +/* + * copy C action to the next ; or closing + */ +int +cpyact(void) +{ + int brac, c, mth; + static int sw, savline; + + brac = 0; + sw = TRUE; + savline = yyline; + + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline); + else + fprintf(fout, "\n# line %d \"%s\"\n", yyline, sargv[optind]); + + while (!eof) { + c = gch(); + swt: + switch (c) { + case '|': + if (brac == 0 && sw == TRUE) { + if (peek == '|') + gch(); /* eat up an extra '|' */ + return (0); + } + break; + case ';': + if (brac == 0) { + putwc(c, fout); + putc('\n', fout); + return (1); + } + break; + case '{': + brac++; + savline = yyline; + break; + case '}': + brac--; + if (brac == 0) { + putwc(c, fout); + putc('\n', fout); + return (1); + } + break; + case '/': + putwc(c, fout); + c = gch(); + if (c != '*') + goto swt; + putwc(c, fout); + savline = yyline; + while (c = gch()) { + while (c == '*') { + putwc(c, fout); + if ((c = gch()) == '/') { + putc('/', fout); + while ((c = gch()) == ' ' || + c == '\t' || c == '\n') + putwc(c, fout); + goto swt; + } + } + putc(c, fout); + } + yyline = savline; + error("EOF inside comment"); + /* NOTREACHED */ + break; + case '\'': /* character constant */ + case '"': /* character string */ + mth = c; + putwc(c, fout); + while (c = gch()) { + if (c == '\\') { + putwc(c, fout); + c = gch(); + } + else + if (c == mth) + goto loop; + putwc(c, fout); + if (c == '\n') { + yyline--; + error( +"Non-terminated string or character constant"); + } + } + error("EOF in string or character constant"); + /* NOTREACHED */ + break; + case '\0': + yyline = savline; + error("Action does not terminate"); + /* NOTREACHED */ + break; + default: + break; /* usual character */ + } + loop: + if (c != ' ' && c != '\t' && c != '\n') + sw = FALSE; + putwc(c, fout); + if (peek == '\n' && !brac && copy_line) { + putc('\n', fout); + return (1); + } + } + error("Premature EOF"); + return (0); +} + +int +gch(void) +{ + int c; + prev = pres; + c = pres = peek; + peek = pushptr > pushc ? *--pushptr : getwc(fin); + while (peek == EOF) { + if (no_input) { + if (!yyline) + error("Cannot read from -- %s", + sargv[optind]); + if (optind < sargc-1) { + yyline = 0; + if (fin != stdin) + fclose(fin); + fin = fopen(sargv[++optind], "r"); + if (fin == NULL) + error("Cannot open file -- %s", + sargv[optind]); + peek = getwc(fin); + } else + break; + } else { + if (fin != stdin) + fclose(fin); + if (!yyline) + error("Cannot read from -- standard input"); + else + break; + } + } + if (c == EOF) { + eof = TRUE; + return (0); + } + if (c == '\n') + yyline++; + return (c); +} + +int +mn2(int a, intptr_t d, intptr_t c) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + if (d >= treesize) { + error("Parse error"); + } + name[tptr] = a; + left[tptr] = d; + right[tptr] = c; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch (a) { + case RSTR: + parent[d] = tptr; + break; + case BAR: + case RNEWE: + if (nullstr[d] || nullstr[c]) + nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + case RCAT: + case DIV: + if (nullstr[d] && nullstr[c]) + nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: + parent[d] = tptr; + nullstr[tptr] = nullstr[d]; + break; +#ifdef DEBUG + default: + warning("bad switch mn2 %d %d", a, d); + break; +#endif + } + return (tptr++); +} + +int +mn1(int a, intptr_t d) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + name[tptr] = a; + left[tptr] = d; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch (a) { + case RCCL: + case RNCCL: + if (slength((CHR *)d) == 0) + nullstr[tptr] = TRUE; + break; + case STAR: + case QUEST: + nullstr[tptr] = TRUE; + parent[d] = tptr; + break; + case PLUS: + case CARAT: + nullstr[tptr] = nullstr[d]; + parent[d] = tptr; + break; + case S2FINAL: + nullstr[tptr] = TRUE; + break; +#ifdef DEBUG + case FINAL: + case S1FINAL: + break; + default: + warning("bad switch mn1 %d %d", a, d); + break; +#endif + } + return (tptr++); +} + +int +mn0(int a) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + + name[tptr] = a; + parent[tptr] = 0; + nullstr[tptr] = 0; + if (ISOPERATOR(a)) { + switch (a) { + case DOT: break; + case RNULLS: nullstr[tptr] = TRUE; break; +#ifdef DEBUG + default: + warning("bad switch mn0 %d", a); + break; +#endif + } + } + return (tptr++); +} + +void +munput(int t, CHR *p) +{ + int i, j; + if (t == 'c') { + *pushptr++ = peek; + peek = *p; + } else if (t == 's') { + *pushptr++ = peek; + peek = p[0]; + i = slength(p); + for (j = i - 1; j >= 1; j--) + *pushptr++ = p[j]; + } + if (pushptr >= pushc + TOKENSIZE) + error("Too many characters pushed"); +} + +int +dupl(int n) +{ + /* duplicate the subtree whose root is n, return ptr to it */ + int i; + i = name[n]; + if (!ISOPERATOR(i)) + return (mn0(i)); + switch (i) { + case DOT: + case RNULLS: + return (mn0(i)); + case RCCL: case RNCCL: case FINAL: case S1FINAL: case S2FINAL: + return (mn1(i, left[n])); + case STAR: case QUEST: case PLUS: case CARAT: + return (mn1(i, dupl(left[n]))); + + /* XCU4: add RXSCON */ + case RSTR: case RSCON: case RXSCON: + return (mn2(i, dupl(left[n]), right[n])); + case BAR: case RNEWE: case RCAT: case DIV: + return (mn2(i, dupl(left[n]), dupl(right[n]))); + } + return (0); +} + +#ifdef DEBUG +void +allprint(CHR c) +{ + switch (c) { + case 014: + printf("\\f"); + charc++; + break; + case '\n': + printf("\\n"); + charc++; + break; + case '\t': + printf("\\t"); + charc++; + break; + case '\b': + printf("\\b"); + charc++; + break; + case ' ': + printf("\\_"); + break; + default: + if (!iswprint(c)) { + printf("\\x%-2x", c); /* up to fashion. */ + charc += 3; + } else + putwc(c, stdout); + break; + } + charc++; +} + +void +strpt(CHR *s) +{ + charc = 0; + while (*s) { + allprint(*s++); + if (charc > LINESIZE) { + charc = 0; + printf("\n\t"); + } + } +} + +void +sect1dump(void) +{ + int i; + printf("Sect 1:\n"); + if (def[0]) { + printf("str trans\n"); + i = -1; + while (def[++i]) + printf("%ls\t%ls\n", def[i], subs[i]); + } + if (sname[0]) { + printf("start names\n"); + i = -1; + while (sname[++i]) + printf("%ls\n", sname[i]); + } + if (chset == TRUE) { + printf("char set changed\n"); + for (i = 1; i < NCH; i++) { + if (i != ctable[i]) { + allprint(i); + putchar(' '); + iswprint(ctable[i]) ? + putwc(ctable[i], stdout) : + printf("%d", ctable[i]); + putchar('\n'); + } + } + } +} + +void +sect2dump(void) +{ + printf("Sect 2:\n"); + treedump(); +} + +void +treedump(void) +{ + int t; + CHR *p; + printf("treedump %d nodes:\n", tptr); + for (t = 0; t < tptr; t++) { + printf("%4d ", t); + parent[t] ? printf("p=%4d", parent[t]) : printf(" "); + printf(" "); + if (!ISOPERATOR(name[t])) { + allprint(name[t]); + } else + switch (name[t]) { + case RSTR: + printf("%ld ", (long)left[t]); + allprint(right[t]); + break; + case RCCL: + printf("ccl "); + strpt((CHR *)left[t]); + break; + case RNCCL: + printf("nccl "); + strpt((CHR *)left[t]); + break; + case DIV: + printf("/ %ld %ld", + (long)left[t], (long)right[t]); + break; + case BAR: + printf("| %ld %ld", + (long)left[t], (long)right[t]); + break; + case RCAT: + printf("cat %ld %ld", + (long)left[t], (long)right[t]); + break; + case PLUS: + printf("+ %ld", (long)left[t]); + break; + case STAR: + printf("* %ld", (long)left[t]); + break; + case CARAT: + printf("^ %ld", (long)left[t]); + break; + case QUEST: + printf("? %ld", (long)left[t]); + break; + case RNULLS: + printf("nullstring"); + break; + case FINAL: + printf("final %ld", (long)left[t]); + break; + case S1FINAL: + printf("s1final %ld", (long)left[t]); + break; + case S2FINAL: + printf("s2final %ld", (long)left[t]); + break; + case RNEWE: + printf("new %ld %ld", + (long)left[t], (long)right[t]); + break; + + /* XCU4: add RXSCON */ + case RXSCON: + p = (CHR *)right[t]; + printf("exstart %s", sname[*p++-1]); + while (*p) + printf(", %ls", sname[*p++-1]); + printf(" %ld", (long)left[t]); + break; + case RSCON: + p = (CHR *)right[t]; + printf("start %s", sname[*p++-1]); + while (*p) + printf(", %ls", sname[*p++-1]); + printf(" %ld", (long)left[t]); + break; + case DOT: + printf("dot"); + break; + default: + printf( + "unknown %d %ld %ld", name[t], + (long)left[t], (long)right[t]); + break; + } + if (nullstr[t]) + printf("\t(null poss.)"); + putchar('\n'); + } +} +#endif diff --git a/lex/sub2.c b/lex/sub2.c @@ -0,0 +1,1217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "sub2.c 6.15 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub2.c 1.7 (gritter) 01/12/07 + */ + +#include "ldefs.c" + +static void add(int **array, int n); +static void follow(int v); +static void first(int v); +static void nextstate(int s, int c); +static void packtrans(int st, CHR *tch, int *tst, int cnt, int tryit); +static void acompute(int s); +static void rprint(int *a, char *s, int n); +static void shiftr(int *a, int n); +static void upone(int *a, int n); +static void bprint(char *a, char *s, int n); +static int notin(int n); +static int member(int d, CHR *t); + +#ifdef PP +static void padd(int **array, int n); +#endif + +void +cfoll(int v) +{ + int i, j, k; + CHR *p; + i = name[v]; + if (!ISOPERATOR(i)) + i = 1; + switch (i) { + case 1: case RSTR: case RCCL: case RNCCL: case RNULLS: + for (j = 0; j < tptr; j++) + tmpstat[j] = FALSE; + count = 0; + follow(v); +#ifdef PP + padd(foll, v); /* packing version */ +#else + add(foll, v); /* no packing version */ +#endif + if (i == RSTR) + cfoll(left[v]); + else if (i == RCCL || i == RNCCL) { + for (j = 1; j < ncg; j++) + symbol[j] = (i == RNCCL); + p = (CHR *) left[v]; + while (*p) + symbol[*p++] = (i == RCCL); + p = pcptr; + for (j = 1; j < ncg; j++) + if (symbol[j]) { + for (k = 0; p + k < pcptr; k++) + if (cindex[j] == *(p + k)) + break; + if (p + k >= pcptr) + *pcptr++ = cindex[j]; + } + *pcptr++ = 0; + if (pcptr > pchar + pchlen) + error( + "Too many packed character classes"); + left[v] = (intptr_t)p; + name[v] = RCCL; /* RNCCL eliminated */ +#ifdef DEBUG + if (debug && *p) { + printf("ccl %d: %d", v, *p++); + while (*p) + printf(", %d", *p++); + putchar('\n'); + } +#endif + } + break; + case CARAT: + cfoll(left[v]); + break; + + /* XCU4: add RXSCON */ + case RXSCON: + + case STAR: case PLUS: case QUEST: case RSCON: + cfoll(left[v]); + break; + case BAR: case RCAT: case DIV: case RNEWE: + cfoll(left[v]); + cfoll(right[v]); + break; +#ifdef DEBUG + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning("bad switch cfoll %d", v); +#endif + } +} + +#ifdef DEBUG +void +pfoll(void) +{ + int i, k, *p; + int j; + /* print sets of chars which may follow positions */ + printf("pos\tchars\n"); + for (i = 0; i < tptr; i++) + if (p = foll[i]) { + j = *p++; + if (j >= 1) { + printf("%d:\t%d", i, *p++); + for (k = 2; k <= j; k++) + printf(", %d", *p++); + putchar('\n'); + } + } +} +#endif + +static void +add(int **array, int n) +{ + int i, *temp; + CHR *ctemp; + temp = nxtpos; + ctemp = tmpstat; + array[n] = nxtpos; /* note no packing is done in positions */ + *temp++ = count; + for (i = 0; i < tptr; i++) + if (ctemp[i] == TRUE) + *temp++ = i; + nxtpos = temp; + if (nxtpos >= positions+maxpos) + error( + "Too many positions %s", + (maxpos == MAXPOS ? "\nTry using %p num" : "")); +} + +static void +follow(int v) +{ + int p; + if (v >= tptr-1) + return; + p = parent[v]; + if (p == 0) + return; + switch (name[p]) { + /* will not be CHAR RNULLS FINAL S1FINAL S2FINAL RCCL RNCCL */ + case RSTR: + if (tmpstat[p] == FALSE) { + count++; + tmpstat[p] = TRUE; + } + break; + case STAR: case PLUS: + first(v); + follow(p); + break; + case BAR: case QUEST: case RNEWE: + follow(p); + break; + case RCAT: case DIV: + if (v == left[p]) { + if (nullstr[right[p]]) + follow(p); + first(right[p]); + } + else + follow(p); + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: case CARAT: + follow(p); + break; +#ifdef DEBUG + default: + warning("bad switch follow %d", p); +#endif + } +} + +/* + * Check if I have a RXSCON in my upper node + */ +static int +check_me(int v) +{ + int tmp = parent[v]; + + while (name[tmp] != RNEWE) { + if (name[tmp] == RXSCON) + return (1); + tmp = parent[tmp]; + } + return (0); +} + +/* calculate set of positions with v as root which can be active initially */ +static void +first(int v) +{ + int i; + CHR *p; + i = name[v]; + if (!ISOPERATOR(i)) + i = 1; + switch (i) { + case 1: case RCCL: case RNCCL: + case RNULLS: case FINAL: + case S1FINAL: case S2FINAL: + /* + * XCU4: if we are working on an exclusive start state and + * the parent of this position is not RXSCON or RSTR this + * is not an active position. + * + * (There is a possibility that RSXCON appreas as the + * (parent)* node. Check it by check_me().) + */ + if ((exclusive[stnum/2]) && + ISOPERATOR(name[parent[v]]) && + (name[parent[v]] != RXSCON) && + (name[parent[v]] != RSTR) && + (check_me(v) == 0)) { + break; + } + if (tmpstat[v] == FALSE) { + count++; + tmpstat[v] = TRUE; + } + break; + case BAR: case RNEWE: + first(left[v]); + first(right[v]); + break; + case CARAT: + if (stnum % 2 == 1) + first(left[v]); + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: + i = stnum/2 +1; + p = (CHR *) right[v]; + while (*p) + if (*p++ == i) { + first(left[v]); + break; + } + break; + case STAR: case QUEST: + case PLUS: case RSTR: + /* + * XCU4: if we are working on an exclusive start state and + * the parent of this position is not RXSCON or RSTR this + * is not an active position. + * + * (There is a possibility that RSXCON appreas as the + * (parent)* node. Check it by check_me().) + */ + if ((exclusive[stnum/2]) && + ISOPERATOR(name[parent[v]]) && + (name[parent[v]] != RXSCON) && + (name[parent[v]] != RSTR) && + (check_me(v) == 0)) { + break; + } + first(left[v]); + break; + case RCAT: case DIV: + first(left[v]); + if (nullstr[left[v]]) + first(right[v]); + break; +#ifdef DEBUG + default: + warning("bad switch first %d", v); +#endif + } +} + +void +cgoto(void) +{ + int i, j; + static int s; + int npos, curpos, n; + int tryit; + CHR tch[MAXNCG]; + int tst[MAXNCG]; + CHR *q; + /* generate initial state, for each start condition */ + if (ratfor) { + fprintf(fout, "blockdata\n"); + fprintf(fout, "common /Lvstop/ vstop\n"); + fprintf(fout, "define Svstop %d\n", nstates+1); + fprintf(fout, "integer vstop(Svstop)\n"); + } else + fprintf(fout, "int yyvstop[] = {\n0,\n"); + while (stnum < 2 || stnum/2 < sptr) { + for (i = 0; i < tptr; i++) + tmpstat[i] = 0; + count = 0; + if (tptr > 0) + first(tptr-1); + add(state, stnum); +#ifdef DEBUG + if (debug) { + if (stnum > 1) + printf("%ls:\n", sname[stnum/2]); + pstate(stnum); + } +#endif + stnum++; + } + stnum--; + /* even stnum = might not be at line begin */ + /* odd stnum = must be at line begin */ + /* even states can occur anywhere, odd states only at line begin */ + for (s = 0; s <= stnum; s++) { + tryit = FALSE; + cpackflg[s] = FALSE; + sfall[s] = -1; + acompute(s); + for (i = 0; i < ncg; i++) + symbol[i] = 0; + npos = *state[s]; + for (i = 1; i <= npos; i++) { + curpos = *(state[s]+i); + if (!ISOPERATOR(name[curpos])) + symbol[name[curpos]] = TRUE; + else { + switch (name[curpos]) { + case RCCL: + tryit = TRUE; + q = (CHR *)left[curpos]; + while (*q) { + for (j = 1; j < ncg; j++) + if (cindex[j] == *q) + symbol[j] = TRUE; + q++; + } + break; + case RSTR: + symbol[right[curpos]] = TRUE; + break; +#ifdef DEBUG + case RNULLS: + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning( + "bad switch cgoto %d state %d", + curpos, s); + break; +#endif + } + } + } +#ifdef DEBUG + if (debug) { + printf("State %d transitions on char-group {", s); + charc = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + printf("%d,", i); + } + if (i == ncg-1) + printf("}\n"); + if (charc > LINESIZE/4) { + charc = 0; + printf("\n\t"); + } + } + } +#endif + /* for each char, calculate next state */ + n = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + /* executed for each state, transition pair */ + nextstate(s, i); + xstate = notin(stnum); + if (xstate == -2) + warning("bad state %d %o", s, i); + else if (xstate == -1) { + if (stnum+1 >= nstates) { + stnum++; + error("Too many states %s", + (nstates == NSTATES ? + "\nTry using %n num":"")); + } + add(state, ++stnum); +#ifdef DEBUG + if (debug) + pstate(stnum); +#endif + tch[n] = i; + tst[n++] = stnum; + } else { /* xstate >= 0 ==> state exists */ + tch[n] = i; + tst[n++] = xstate; + } + } + } + tch[n] = 0; + tst[n] = -1; + /* pack transitions into permanent array */ + if (n > 0) + packtrans(s, tch, tst, n, tryit); + else + gotof[s] = -1; + } + ratfor ? fprintf(fout, "end\n") : fprintf(fout, "0};\n"); +} + +/* + * Beware -- 70% of total CPU time is spent in this subroutine - + * if you don't believe me - try it yourself ! + */ +static void +nextstate(int s, int c) +{ + int j, *newpos; + CHR *temp, *tz; + int *pos, i, *f, num, curpos, number; + /* state to goto from state s on char c */ + num = *state[s]; + temp = tmpstat; + pos = state[s] + 1; + for (i = 0; i < num; i++) { + curpos = *pos++; + j = name[curpos]; + if ((!ISOPERATOR(j)) && j == c || + j == RSTR && c == right[curpos] || + j == RCCL && member(c, (CHR *) left[curpos])) { + f = foll[curpos]; + number = *f; + newpos = f+1; + for (j = 0; j < number; j++) + temp[*newpos++] = 2; + } + } + j = 0; + tz = temp + tptr; + while (temp < tz) { + if (*temp == 2) { + j++; + *temp++ = 1; + } + else + *temp++ = 0; + } + count = j; +} + +static int +notin(int n) +{ /* see if tmpstat occurs previously */ + int *j, k; + CHR *temp; + int i; + if (count == 0) + return (-2); + temp = tmpstat; + for (i = n; i >= 0; i--) { /* for each state */ + j = state[i]; + if (count == *j++) { + for (k = 0; k < count; k++) + if (!temp[*j++]) + break; + if (k >= count) + return (i); + } + } + return (-1); +} + +static void +packtrans(int st, CHR *tch, int *tst, int cnt, int tryit) +{ + /* + * pack transitions into nchar, nexts + * nchar is terminated by '\0', nexts uses cnt, followed by elements + * gotof[st] = index into nchr, nexts for state st + * sfall[st] = t implies t is fall back state for st + * == -1 implies no fall back + */ + + int cmin, cval, tcnt, diff, p, *ast; + int i, j, k; + CHR *ach; + int go[MAXNCG], temp[MAXNCG], index, c; + int swork[MAXNCG]; + CHR cwork[MAXNCG]; + int upper; + + rcount += (long)cnt; + cmin = -1; + cval = ncg; + ast = tst; + ach = tch; + /* try to pack transitions using ccl's */ + if (!optim) + goto nopack; /* skip all compaction */ + if (tryit) { /* ccl's used */ + for (i = 1; i < ncg; i++) { + go[i] = temp[i] = -1; + symbol[i] = 1; + } + for (i = 0; i < cnt; i++) { + index = (unsigned char) tch[i]; + if ((index >= 0) && (index < NCH)) { + go[index] = tst[i]; + symbol[index] = 0; + } else { + fprintf(stderr, +"lex`sub2`packtran: tch[%d] out of bounds (%ld)\n", + i, (long)tch[i]); + } + } + for (i = 0; i < cnt; i++) { + c = match[tch[i]]; + if (go[c] != tst[i] || c == tch[i]) + temp[tch[i]] = tst[i]; + } + /* fill in error entries */ + for (i = 1; i < ncg; i++) + if (symbol[i]) + temp[i] = -2; /* error trans */ + /* count them */ + k = 0; + for (i = 1; i < ncg; i++) + if (temp[i] != -1) + k++; + if (k < cnt) { /* compress by char */ +#ifdef DEBUG + if (debug) + printf( + "use compression %d, %d vs %d\n", st, k, cnt); +#endif + k = 0; + for (i = 1; i < ncg; i++) + if (temp[i] != -1) { + cwork[k] = i; + swork[k++] = + (temp[i] == -2 ? -1 : temp[i]); + } + cwork[k] = 0; +#ifdef PC + ach = cwork; + ast = swork; + cnt = k; + cpackflg[st] = TRUE; +#endif + } + } + /* + * get most similar state + * reject state with more transitions, + * state already represented by a third state, + * and state which is compressed by char if ours is not to be + */ + for (i = 0; i < st; i++) { + if (sfall[i] != -1) + continue; + if (cpackflg[st] == 1) + if (!(cpackflg[i] == 1)) + continue; + p = gotof[i]; + if (p == -1) /* no transitions */ + continue; + tcnt = nexts[p]; + if (tcnt > cnt) + continue; + diff = 0; + k = 0; + j = 0; + upper = p + tcnt; + while (ach[j] && p < upper) { + while (ach[j] < nchar[p] && ach[j]) { + diff++; + j++; + } + if (ach[j] == 0) + break; + if (ach[j] > nchar[p]) { + diff = ncg; + break; + } + /* ach[j] == nchar[p] */ + if (ast[j] != nexts[++p] || + ast[j] == -1 || + (cpackflg[st] && ach[j] != match[ach[j]])) + diff++; + j++; + } + while (ach[j]) { + diff++; + j++; + } + if (p < upper) + diff = ncg; + if (diff < cval && diff < tcnt) { + cval = diff; + cmin = i; + if (cval == 0) + break; + } + } + /* cmin = state "most like" state st */ +#ifdef DEBUG + if (debug) + printf("select st %d for st %d diff %d\n", + cmin, st, cval); +#endif +#ifdef PS + if (cmin != -1) { /* if we can use st cmin */ + gotof[st] = nptr; + k = 0; + sfall[st] = cmin; + p = gotof[cmin] + 1; + j = 0; + while (ach[j]) { + /* if cmin has a transition on c, then so will st */ + /* st may be "larger" than cmin, however */ + while (ach[j] < nchar[p-1] && ach[j]) { + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + j++; + } + if (nchar[p-1] == 0) + break; + if (ach[j] > nchar[p-1]) { + warning("bad transition %d %d", st, cmin); + goto nopack; + } + /* ach[j] == nchar[p-1] */ + if (ast[j] != nexts[p] || + ast[j] == -1 || + (cpackflg[st] && ach[j] != match[ach[j]])) { + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + } + p++; + j++; + } + while (ach[j]) { + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j++]; + k++; + } + nexts[gotof[st]] = cnt = k; + nchar[nptr++] = 0; + } else { +#endif +nopack: + /* stick it in */ + gotof[st] = nptr; + nexts[nptr] = cnt; + for (i = 0; i < cnt; i++) { + nchar[nptr] = ach[i]; + nexts[++nptr] = ast[i]; + } + nchar[nptr++] = 0; +#ifdef PS + } +#endif + if (cnt < 1) { + gotof[st] = -1; + nptr--; + } else + if (nptr > ntrans) + error( + "Too many transitions %s", + (ntrans == NTRANS ? "\nTry using %a num" : "")); +} + +#ifdef DEBUG +void +pstate(int s) +{ + int *p, i, j; + printf("State %d:\n", s); + p = state[s]; + i = *p++; + if (i == 0) + return; + printf("%4d", *p++); + for (j = 1; j < i; j++) { + printf(", %4d", *p++); + if (j%30 == 0) + putchar('\n'); + } + putchar('\n'); +} +#endif + +static int +member(int d, CHR *t) +{ + int c; + CHR *s; + c = d; + s = t; + c = cindex[c]; + while (*s) + if (*s++ == c) + return (1); + return (0); +} + +#ifdef DEBUG +void +stprt(int i) +{ + int p, t; + printf("State %d:", i); + /* print actions, if any */ + t = atable[i]; + if (t != -1) + printf(" final"); + putchar('\n'); + if (cpackflg[i] == TRUE) + printf("backup char in use\n"); + if (sfall[i] != -1) + printf("fall back state %d\n", sfall[i]); + p = gotof[i]; + if (p == -1) + return; + printf("(%d transitions)\n", nexts[p]); + while (nchar[p]) { + charc = 0; + if (nexts[p+1] >= 0) + printf("%d\t", nexts[p+1]); + else + printf("err\t"); + allprint(nchar[p++]); + while (nexts[p] == nexts[p+1] && nchar[p]) { + if (charc > LINESIZE) { + charc = 0; + printf("\n\t"); + } + allprint(nchar[p++]); + } + putchar('\n'); + } + putchar('\n'); +} +#endif + +/* compute action list = set of poss. actions */ +static void +acompute(int s) +{ + int *p, i, j; + int q, r; + int cnt, m; + int temp[MAXPOSSTATE], k, neg[MAXPOSSTATE], n; + k = 0; + n = 0; + p = state[s]; + cnt = *p++; + if (cnt > MAXPOSSTATE) + error("Too many positions for one state - acompute"); + for (i = 0; i < cnt; i++) { + q = *p; + if (name[q] == FINAL) + temp[k++] = left[q]; + else if (name[q] == S1FINAL) { + temp[k++] = left[q]; + if ((r = left[q]) >= NACTIONS) + error( + "INTERNAL ERROR:left[%d]==%d>=NACTIONS", q, r); + extra[r] = 1; + } else if (name[q] == S2FINAL) + neg[n++] = left[q]; + p++; + } + atable[s] = -1; + if (k < 1 && n < 1) + return; +#ifdef DEBUG + if (debug) + printf("final %d actions:", s); +#endif + /* sort action list */ + for (i = 0; i < k; i++) + for (j = i+1; j < k; j++) + if (temp[j] < temp[i]) { + m = temp[j]; + temp[j] = temp[i]; + temp[i] = m; + } + /* remove dups */ + for (i = 0; i < k-1; i++) + if (temp[i] == temp[i+1]) + temp[i] = 0; + /* copy to permanent quarters */ + atable[s] = aptr; +#ifdef DEBUG + if (!ratfor) + fprintf(fout, "/* actions for state %d */", s); +#endif + putc('\n', fout); + for (i = 0; i < k; i++) + if (temp[i] != 0) { + ratfor ? + fprintf(fout, "data vstop(%d)/%d/\n", aptr, temp[i]) : + fprintf(fout, "%d,\n", temp[i]); +#ifdef DEBUG + if (debug) + printf("%d ", temp[i]); +#endif + aptr++; + } + for (i = 0; i < n; i++) { /* copy fall back actions - all neg */ + ratfor ? + fprintf(fout, "data vstop(%d)/%d/\n", aptr, neg[i]) : + fprintf(fout, "%d,\n", neg[i]); + aptr++; +#ifdef DEBUG + if (debug) + printf("%d ", neg[i]); +#endif + } +#ifdef DEBUG + if (debug) + putchar('\n'); +#endif + ratfor ? fprintf(fout, "data vstop (%d)/0/\n", aptr) : + fprintf(fout, "0, \n"); + aptr++; +} + +#ifdef DEBUG +void +pccl(void) +{ + /* print character class sets */ + int i, j; + printf("char class intersection\n"); + for (i = 0; i < ccount; i++) { + charc = 0; + printf("class %d:\n\t", i); + for (j = 1; j < ncg; j++) + if (cindex[j] == i) { + allprint(j); + if (charc > LINESIZE) { + printf("\n\t"); + charc = 0; + } + } + putchar('\n'); + } + charc = 0; + printf("match:\n"); + for (i = 0; i < ncg; i++) { + allprint(match[i]); + if (charc > LINESIZE) { + putchar('\n'); + charc = 0; + } + } + putchar('\n'); +} +#endif + +void +mkmatch(void) +{ + int i; + CHR tab[MAXNCG]; + for (i = 0; i < ccount; i++) + tab[i] = 0; + for (i = 1; i < ncg; i++) + if (tab[cindex[i]] == 0) + tab[cindex[i]] = i; + /* tab[i] = principal char for new ccl i */ + for (i = 1; i < ncg; i++) + match[i] = tab[cindex[i]]; +} + +void +layout(void) +{ + /* format and output final program's tables */ + int i, j, k; + int top, bot, startup, omin; + startup = 0; + for (i = 0; i < outsize; i++) + verify[i] = advance[i] = 0; + omin = 0; + yytop = 0; + for (i = 0; i <= stnum; i++) { /* for each state */ + j = gotof[i]; + if (j == -1) { + stoff[i] = 0; + continue; + } + bot = j; + while (nchar[j]) + j++; + top = j - 1; +#if DEBUG + if (debug) { + printf("State %d: (layout)\n", i); + for (j = bot; j <= top; j++) { + printf(" %o", nchar[j]); + if (j % 10 == 0) + putchar('\n'); + } + putchar('\n'); + } +#endif + while (verify[omin+ZCH]) + omin++; + startup = omin; +#if DEBUG + if (debug) + printf( + "bot,top %d, %d startup begins %d\n", + bot, top, startup); +#endif + if (chset) { + do { + startup += 1; + if (startup > outsize - ZCH) + error("output table overflow"); + for (j = bot; j <= top; j++) { + k = startup+ctable[nchar[j]]; + if (verify[k]) + break; + } + } while (j <= top); +#if DEBUG + if (debug) + printf(" startup will be %d\n", startup); +#endif + /* have found place */ + for (j = bot; j <= top; j++) { + k = startup + ctable[nchar[j]]; + if (ctable[nchar[j]] <= 0) + printf( + "j %d nchar %ld ctable.nch %d\n", + j, (long)nchar[j], ctable[nchar[k]]); + verify[k] = i + 1; /* state number + 1 */ + advance[k] = nexts[j+1]+1; + if (yytop < k) + yytop = k; + } + } else { + do { + startup += 1; + if (startup > outsize - ZCH) + error("output table overflow"); + for (j = bot; j <= top; j++) { + k = startup + nchar[j]; + if (verify[k]) + break; + } + } while (j <= top); + /* have found place */ +#if DEBUG + if (debug) + printf(" startup going to be %d\n", startup); +#endif + for (j = bot; j <= top; j++) { + k = startup + nchar[j]; + verify[k] = i+1; /* state number + 1 */ + advance[k] = nexts[j+1] + 1; + if (yytop < k) + yytop = k; + } + } + stoff[i] = startup; + } + + /* stoff[i] = offset into verify, advance for trans for state i */ + /* put out yywork */ + if (ratfor) { + fprintf(fout, "define YYTOPVAL %d\n", yytop); + rprint(verify, "verif", yytop+1); + rprint(advance, "advan", yytop+1); + shiftr(stoff, stnum); + rprint(stoff, "stoff", stnum+1); + shiftr(sfall, stnum); + upone(sfall, stnum+1); + rprint(sfall, "fall", stnum+1); + bprint(extra, "extra", casecount+1); + bprint((char *)match, "match", ncg); + shiftr(atable, stnum); + rprint(atable, "atable", stnum+1); + } + fprintf(fout, + "# define YYTYPE %s\n", stnum+1 >= NCH ? "int" : "unsigned char"); + fprintf(fout, + "struct yywork { YYTYPE verify, advance; } yycrank[] = {\n"); + for (i = 0; i <= yytop; i += 4) { + for (j = 0; j < 4; j++) { + k = i+j; + if (verify[k]) + fprintf(fout, + "{ %d,%d },\t", verify[k], advance[k]); + else + fprintf(fout, "{ 0,0 },\t"); + } + putc('\n', fout); + } + fprintf(fout, "{0,0}};\n"); + + /* put out yysvec */ + + fprintf(fout, "struct yysvf yysvec[] = {\n"); + fprintf(fout, "{ 0,\t0,\t0 },\n"); + for (i = 0; i <= stnum; i++) { /* for each state */ + if (cpackflg[i]) + stoff[i] = -stoff[i]; + fprintf(fout, "{ yycrank+%d,\t", stoff[i]); + if (sfall[i] != -1) + fprintf(fout, + "yysvec+%d,\t", sfall[i]+1); /* state + 1 */ + else + fprintf(fout, "0,\t\t"); + if (atable[i] != -1) + fprintf(fout, "yyvstop+%d", atable[i]); + else + fprintf(fout, "0"); +#ifdef DEBUG + fprintf(fout, " },\t\t/* state %d */", i); +#endif + fprintf(fout, " },\t\t/* state %d */", i); + } + fprintf(fout, "{ 0,\t0,\t0}};\n"); + + /* put out yymatch */ + + fprintf(fout, "struct yywork *yytop = yycrank+%d;\n", yytop); + fprintf(fout, "struct yysvf *yybgin = yysvec+1;\n"); + if (optim) { + if (handleeuc) { + fprintf(fout, "int yymatch[] = {\n"); + } else { + fprintf(fout, "char yymatch[] = {\n"); + } + if (chset == 0) { /* no chset, put out in normal order */ + for (i = 0; i < ncg; i += 8) { + for (j = 0; j < 8; j++) { + int fbch; + fbch = match[i+j]; + fprintf(fout, "%3d, ", fbch); + } + putc('\n', fout); + } + } else { + int *fbarr; + fbarr = myalloc(2*MAXNCG, sizeof (*fbarr)); + if (fbarr == 0) + error("No space for char table reverse", 0); + for (i = 0; i < MAXNCG; i++) + fbarr[i] = 0; + for (i = 0; i < ncg; i++) + fbarr[ctable[i]] = ctable[match[i]]; + for (i = 0; i < ncg; i += 8) { + for (j = 0; j < 8; j++) + fprintf(fout, "0%-3o,", fbarr[i+j]); + putc('\n', fout); + } + free(fbarr); + } + fprintf(fout, "0};\n"); + } + /* put out yyextra */ + fprintf(fout, "char yyextra[] = {\n"); + for (i = 0; i < casecount; i += 8) { + for (j = 0; j < 8; j++) + fprintf(fout, "%d,", i+j < NACTIONS ? + extra[i+j] : 0); + putc('\n', fout); + } + fprintf(fout, "0};\n"); + if (handleeuc) { + /* Put out yycgidtbl */ + fprintf(fout, "#define YYNCGIDTBL %d\n", ncgidtbl); + fprintf(fout, "\tunsigned long yycgidtbl[]={"); + /* + * Use "unsigned long" instead of "lchar" to minimize + * the name-space polution for the application program. + */ + for (i = 0; i < ncgidtbl; ++i) { + if (i%8 == 0) + fprintf(fout, "\n\t\t"); + fprintf(fout, "0x%08lx, ", yycgidtbl[i]); + } + fprintf(fout, "\n\t0};\n"); + } +} + +static void +rprint(int *a, char *s, int n) +{ + int i; + fprintf(fout, "block data\n"); + fprintf(fout, "common /L%s/ %s\n", s, s); + fprintf(fout, "define S%s %d\n", s, n); + fprintf(fout, "integer %s (S%s)\n", s, s); + for (i = 1; i <= n; i++) { + if (i%8 == 1) + fprintf(fout, "data "); + fprintf(fout, "%s (%d)/%d/", s, i, a[i]); + fprintf(fout, (i%8 && i < n) ? ", " : "\n"); + } + fprintf(fout, "end\n"); +} + +static void +shiftr(int *a, int n) +{ + int i; + for (i = n; i >= 0; i--) + a[i+1] = a[i]; +} + +static void +upone(int *a, int n) +{ + int i; + for (i = 0; i <= n; i++) + a[i]++; +} + +static void +bprint(char *a, char *s, int n) +{ + int i, j, k; + fprintf(fout, "block data\n"); + fprintf(fout, "common /L%s/ %s\n", s, s); + fprintf(fout, "define S%s %d\n", s, n); + fprintf(fout, "integer %s (S%s)\n", s, s); + for (i = 1; i < n; i += 8) { + fprintf(fout, "data %s (%d)/%d/", s, i, a[i]); + for (j = 1; j < 8; j++) { + k = i+j; + if (k < n) + fprintf(fout, ", %s (%d)/%d/", s, k, a[k]); + } + putc('\n', fout); + } + fprintf(fout, "end\n"); +} + +#ifdef PP +static void +padd(int **array, int n) +{ + int i, *j, k; + array[n] = nxtpos; + if (count == 0) { + *nxtpos++ = 0; + return; + } + for (i = tptr-1; i >= 0; i--) { + j = array[i]; + if (j && *j++ == count) { + for (k = 0; k < count; k++) + if (!tmpstat[*j++]) + break; + if (k >= count) { + array[n] = array[i]; + return; + } + } + } + add(array, n); +} +#endif diff --git a/lex/sub3.c b/lex/sub3.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "sub3.c 1.8 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub3.c 1.4 (gritter) 11/26/05 + */ + +/* + * sub3.c ... ALE enhancement. + * Since a typical Asian language has a huge character set, it is not + * ideal to index an array by a character code itself, which requires + * as large as 2**16 entries per array. + * To get arround this problem, we identify a set of characters that + * causes the same transition on all states and call it character group. + * Every character in a same character group has a unique number called + * character group id. A function yycgid(c) maps the character c (in process + * code) to the id. This mapping is determined by analyzing all regular + * expressions in the lex program. + * + */ +#include <stdlib.h> +#ifdef __sun +#include <widec.h> +#endif +#include "search.h" +#include "ldefs.c" +#include <ctype.h> + +/* + * "lchar" stands for linearized character. It is a variant of + * process code. AT&T's 16-bit process code has a drawback in which + * for three three process code C, D and E where C <= D <= E, + * codeset(C)==codeset(E) does not mean codeset(D)==codeset(C). + * In other words, four codesets alternates as the magnitude + * of character increases. + * The lchar representation holds this property: + * If three lchar C', D' and E' have the relationship C' < D' < E' and + * codeset(C') == codeset(E') then D' is guaranteed to belong to + * the same codeset as C' and E'. + * lchar is implemented as 32 bit entities and the function linearize() + * that maps a wchar_t to lchar is defined below. There is no + * reverse function for it though. + * The 32-bit process code by AT&T, used only for Taiwanese version at the + * time of wrting, has no such problem and we use it as it is. + */ + +lchar yycgidtbl[MAXNCG] = { + 0, /* For ease of computation of the id. */ + '\n', /* Newline is always special because '.' exclude it. */ + 0x000000ff, /* The upper limit of codeset 0. */ + 0x20ffffff, /* The upper limit of codeset 2. */ + 0x40ffffff /* The upper limit of codeset 3. */ +/* 0x60ffffff The upper limit of codeset 1. */ + /* Above assumes the number of significant bits of wchar_t is <= 24. */ +}; +int ncgidtbl = 5; /* # elements in yycgidtbl. */ +int ncg; /* Should set to ncgidtbl*2; this is the largest value yycgid() */ + /* returns plus 1. */ + +static void setsymbol(int i); + +/* + * For given 16-bit wchar_t (See NOTE), lchar is computed as illustrated below: + * + * wc: axxxxxxbyyyyyyy + * + * returns: 0ab0000000000000axxxxxxxbyyyyyyy + * + * linearize() doesn't do any if compiled with 32-bit wchar_t, use of + * which is flagged with LONG_WCHAR_T macro. + * NOTE: + * The implementation is highly depends on the process code representation. + * This function should be modified when 32-bit process code is used. + * There is no need to keep 'a' and 'b' bits in the lower half of lchar. + * You can actually omit these and squeeze the xxxxxx part one bit right. + * We don't do that here just in sake of speed. + */ +lchar +linearize(wchar_t wc) +{ +#ifdef LONG_WCHAR_T + return ((lchar)wc); /* Don't do anything. */ +#else + + lchar prefix; + switch (wc&0x8080) { + case 0x0000: prefix = 0x00000000; break; + case 0x0080: prefix = 0x20000000; break; + case 0x8000: prefix = 0x40000000; break; + case 0x8080: prefix = 0x60000000; break; + } + return (prefix|wc); +#endif +} + +/* compare liniear characters pointed to by pc1 and pc2 */ +int +cmplc(const void *arg1, const void *arg2) +{ + lchar *pc1 = (lchar *)arg1; + lchar *pc2 = (lchar *)arg2; + + if (*pc1 > *pc2) + return (1); + else if (*pc1 == *pc2) + return (0); + else + return (-1); +} + +void +remch(wchar_t c) +{ + lchar lc = linearize(c); + + /* + * User-friendliness consideration: + * Make sure no EUC chars are used in reg. exp. + */ + if (!handleeuc) { + if (!isascii(c)) + if (iswprint(c)) + warning( +"Non-ASCII character '%lc' in pattern; use -w or -e lex option.", c); + else warning( +"Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c); + /* In any case, we don't need to construct ncgidtbl[]. */ + return; + } + + xlsearch(&lc, yycgidtbl, + (unsigned *)&ncgidtbl, sizeof (lchar), cmplc); +} + +void +sortcgidtbl(void) +{ + if (!handleeuc) + return; + qsort(yycgidtbl, ncgidtbl, sizeof (lchar), cmplc); +} + +/* + * int yycgid(wchar_t c) + * Takes c and returns its character group id, determind by the + * following algorithm. The program also uses the binary search + * algorithm, generalized from Knuth (6.2.1) Algorithm B. + * + * This function computes the "character group id" based on + * a table yycgidtbl of which each lchar entry is pre-sorted + * in ascending sequence The number of valid entries is given + * by YYNCGIDTBL. There is no duplicate entries in yycgidtbl. + * const int YYNCGIDTBL; + * lchar yycgidtbl[YYNCGIDTBL]; + * + * yycgidtbl[0] is guaranteed to have zero. + * + * For given c, yycgid(c) returns: + * 2*i iff yycgidtbl[i] == lc + * 2*i+1 iff yycgidtbl[i] < lc < yycgidtbl[i+1] + * YYNCGIDTBL*2-1 + * iff yycgidtbl[YYNCGIDTBL-1] < lc + * where lc=linearize(c). + * + * Some interesting properties.: + * 1. For any c, 0 <= yycgid(c) <= 2*YYNCGIDTBL-1 + * 2. yycgid(c) == 0 iff c == 0. + * 3. For any wchar_t c and d, if linearize(c) < linearize(d) then + * yycgid(c) <= yycgid(d). + * 4. For any wchar_t c and d, if yycgid(c) < yycgid(d) then + * linearize(c) < linearize(d). + */ +#define YYNCGIDTBL ncgidtbl + +int +yycgid(wchar_t c) +{ + int first = 0; + int last = YYNCGIDTBL - 1; + lchar lc; + + /* + * In ASCII compat. mode, each character forms a "group" and the + * group-id is itself... + */ + if (!handleeuc) + return (c); + + lc = linearize(c); + + /* An exceptional case: yycgidtbl[YYNCGIDTBL-1] < lc */ + if (yycgidtbl[YYNCGIDTBL - 1] < lc) + return (YYNCGIDTBL*2 - 1); + + while (last >= 0) { + int i = (first+last)/2; + if (lc == yycgidtbl[i]) + return (2*i); /* lc exactly matches an element. */ + else if (yycgidtbl[i] < lc) { + if (lc < yycgidtbl[i+1]) + return (2*i+1); /* lc is in between two elements. */ + else + first = i + 1; + } else + last = i - 1; + } + error( + "system error in yycgid():binary search failed for c=0x%04x\n", c); + return (0); +} + +/* + * repbycgid --- replaces each character in the parsing tree by its + * character group id. This, however, should be called even in + * the ASCII compat. mode to process DOT nodes and to call cclinter() + * for the DOT and CCL nodes. + */ +void +repbycgid(void) +{ + int i, c; + + for (i = 0; i < tptr; ++i) { + c = name[i]; + if (!ISOPERATOR(c)) { + /* If not an operator, it must be a char. */ + name[i] = yycgid((wchar_t)c); /* So replace it. */ +#ifdef DEBUG + if (debug) { + printf("name[%d]:'%c'->%d;\n", i, c, name[i]); + } +#endif + } else if (c == RSTR) { + c = right[i]; + right[i] = yycgid((wchar_t)c); +#ifdef DEBUG + if (debug) { + printf( + "name[%d].right:'%c'->%d;\n", i, c, right[i]); + } +#endif + } else if ((c == RCCL) || (c == RNCCL)) { + CHR cc, *s; + int j; + CHR ccltoken[CCLSIZE]; + CHR *ccp; + int m; + /* + * This node represetns a character class RE [ccccc] + * s points to the string of characters that forms + * the class and/or a special prefix notation + * <RANGE>XY which corresponds to the RE X-Y, + * characters in the range of X and Y. Here, + * X <= Y is guranteed. + * We transform these characters into a string + * of sorted character group ids. + * + * There is another mechanism of packing tables + * that is inherited from the ASCII lex. Call of + * cclinter() is required for this packing. + * This used to be done as yylex() reads the lex + * rules but we have to do this here because the + * transition table is made to work on the char-group + * ids and the mapping cannot be determined until + * the entire file is read. + */ +#ifdef DEBUG + if (debug) { + printf("name[%d]:R[N]CCL of \"", i); + strpt((CHR *)left[i]); + printf(" -> {"); + } +#endif + /* Prepare symbol[] for cclinter(). */ + for (j = 0; j < ncg; ++j) + symbol[j] = FALSE; + + s = (CHR *) left[i]; + while (cc = *s++) { + if (cc == RANGE) { + int low, high, i; + /* + * Special form: <RANGE>XY + * This means the range X-Y. + * We mark all symbols[] + * elements for yycgid(X) thru + * yycgid(Y), inclusively. + */ + low = yycgid(*s++); + high = yycgid(*s++); + for (i = low; i <= high; ++i) + setsymbol(i); + } else { + setsymbol(yycgid(cc)); + } + } + + /* Now make a transformed string of cgids. */ + s = ccptr; + m = 0; + for (j = 0; j < ncg; ++j) + if (symbol[j]) { + ccltoken[m++] = (CHR)j; +#ifdef DEBUG + if (debug) printf("%d, ", j); +#endif + } + +#ifdef DEBUG + if (debug) printf("}\n"); +#endif + ccltoken[m] = 0; + ccp = ccl; + while (ccp < ccptr && scomp(ccltoken, ccp) != 0) + ccp++; + if (ccp < ccptr) { /* character class found in ccl */ + left[i] = (intptr_t)ccp; + } else { /* not in ccl, add it */ + left[i] = (intptr_t)ccptr; + scopy(ccltoken, ccptr); + ccptr += slength(ccltoken) + 1; + if (ccptr > ccl + CCLSIZE) + error("Too many large character classes"); + } + cclinter(c == RCCL); + } else if (c == DOT) { + if (psave == 0) { /* First DOT node. */ + int j, nlid; + /* + * Make symbol[k]=TRUE for all k + * except k == yycgid('\n'). + */ + nlid = yycgid('\n'); + psave = ccptr; + for (j = 1; j < ncg; ++j) { + if (j == nlid) { + symbol[j] = FALSE; + } else { + symbol[j] = TRUE; + *ccptr++ = (CHR) j; + } + } + *ccptr++ = 0; + if (ccptr > ccl + CCLSIZE) + error("Too many large character classes"); + } + /* Mimic mn1(RCCL,psave)... */ + name[i] = RCCL; + left[i] = (intptr_t)psave; + cclinter(1); + } + } +#ifdef DEBUG + if (debug) { + printf("treedump after repbycgid().\n"); + treedump(); + } +#endif +} + +static void +setsymbol(int i) +{ + if (i > sizeof (symbol)) + error("setsymbol: (SYSERR) %d out of range", i); + symbol[i] = TRUE; +} diff --git a/lex/wcio.c b/lex/wcio.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)wcio.c 1.1 (gritter) 6/25/05 + */ +#include <stdlib.h> +#include <wchar.h> +#include <stdio.h> +#include <limits.h> +#include <errno.h> + +extern int error(char *, ...); + +/* + * This is like getwc() but issues an error message when an illegal + * byte sequence is encountered. + */ +wint_t +lex_getwc(FILE *fp) +{ + wint_t wc; + + if ((wc = getwc(fp)) != WEOF) + return wc; + if (ferror(fp) && errno == EILSEQ) + error("illegal byte sequence"); + return wc; +} + +/* + * A substitute for putwc(), to ensure that stdio output FILE objects + * are always byte-oriented. + */ +wint_t +lex_putwc(wchar_t wc, FILE *fp) +{ + char mb[MB_LEN_MAX]; + int i, n; + + if ((n = wctomb(mb, wc)) < 0) { + wctomb(mb, 0); + errno = EILSEQ; + return WEOF; + } + for (i = 0; i < n; i++) + if (putc(mb[i]&0377, fp) == EOF) + return WEOF; + return wc; +} diff --git a/lex/yyless.c b/lex/yyless.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "yyless.c 6.14 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)yyless.c 1.6 (gritter) 11/27/05 + */ + +#include <stdlib.h> +#ifdef __sun +#include <sys/euc.h> +#include <widec.h> +#endif +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> + +extern int yyprevious; + +#ifndef JLSLEX +#define CHR char + +extern CHR yytext[]; + +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#endif + +#ifdef WOPTION +#define CHR wchar_t + +extern CHR yytext[]; + +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define yyless yyless_w +#endif + +#ifdef EOPTION +#define CHR wchar_t + +extern int yyleng; +extern CHR yytext[]; +extern CHR yywtext[]; + +#define YYTEXT yywtext +#define YYLENG yywleng +#define YYINPUT yywinput +#define YYUNPUT yywunput +#define YYOUTPUT yywoutput +#define yyless yyless_e +#endif + +extern int YYLENG; +#if defined(__STDC__) + extern void YYUNPUT(int); +#endif + +#if defined(__cplusplus) || defined(__STDC__) +/* XCU4: type of yyless() changes to int */ +int +yyless(int x) +#else +yyless(x) +int x; +#endif +{ + register CHR *lastch, *ptr; + + lastch = YYTEXT+YYLENG; + if (x >= 0 && x <= YYLENG) + ptr = x + YYTEXT; + else { + if (sizeof (int) != sizeof (intptr_t)) { + static int seen = 0; + + if (!seen) { + write(2, + "warning: yyless pointer arg truncated\n", 39); + seen = 1; + } + } + /* + * The cast on the next line papers over an unconscionable nonportable + * glitch to allow the caller to hand the function a pointer instead of + * an integer and hope that it gets figured out properly. But it's + * that way on all systems. + */ + ptr = (CHR *)(intptr_t)x; + } + while (lastch > ptr) + YYUNPUT(*--lastch); + *lastch = 0; + if (ptr > YYTEXT) + yyprevious = *--lastch; + YYLENG = ptr-YYTEXT; +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + return (0); +} diff --git a/lex/yywrap.c b/lex/yywrap.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "yywrap.c 6.4 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)yywrap.c 1.3 (gritter) 6/18/05 + */ + +#if defined(__cplusplus) || defined(__STDC__) +int yywrap(void) +#else +yywrap() +#endif +{ + return(1); +} diff --git a/libcommon/CHECK.c b/libcommon/CHECK.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)CHECK.c 1.8 (gritter) 12/16/07 */ + +#include <stdlib.h> + +#ifdef __FreeBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __APPLE__ +#include <available.h> +#if __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_OS_X_VERSION_10_5 +#define NEED_ALLOCA_H 1 +#endif +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __DragonFly__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __OpenBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __NetBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __dietlibc__ +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __UCLIBC__ +#define NEED_UTMPX_H 1 +#endif + +#ifndef NEED_ALLOCA_H +#define NEED_ALLOCA_H 0 +#endif + +#ifndef NEED_MALLOC_H +#define NEED_MALLOC_H 0 +#endif + +#ifndef NEED_UTMPX_H +#define NEED_UTMPX_H 0 +#endif + +int alloca_h = NEED_ALLOCA_H; +int malloc_h = NEED_MALLOC_H; +int utmpx_h = NEED_UTMPX_H; diff --git a/libcommon/_alloca.h b/libcommon/_alloca.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_alloca.h 1.5 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <stdlib.h> +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ diff --git a/libcommon/_malloc.h b/libcommon/_malloc.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_malloc.h 1.2 (gritter) 5/1/04 */ + +#include <stdlib.h> + +extern void *memalign(size_t, size_t); diff --git a/libcommon/_utmpx.h b/libcommon/_utmpx.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_utmpx.h 1.9 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__UCLIBC__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <sys/types.h> +#include <sys/time.h> +#include <utmp.h> + +#ifndef __dietlibc__ +struct utmpx { + char ut_user[UT_NAMESIZE]; + char ut_id[UT_LINESIZE]; + char ut_line[UT_LINESIZE]; + char ut_host[UT_HOSTSIZE]; + pid_t ut_pid; + short ut_type; + struct timeval ut_tv; + struct { + int e_termination; + int e_exit; + } ut_exit; +}; + +#ifndef EMPTY +#define EMPTY 0 +#endif +#ifndef BOOT_TIME +#define BOOT_TIME 1 +#endif +#ifndef OLD_TIME +#define OLD_TIME 2 +#endif +#ifndef NEW_TIME +#define NEW_TIME 3 +#endif +#ifndef USER_PROCESS +#define USER_PROCESS 4 +#endif +#ifndef INIT_PROCESS +#define INIT_PROCESS 5 +#endif +#ifndef LOGIN_PROCESS +#define LOGIN_PROCESS 6 +#endif +#ifndef DEAD_PROCESS +#define DEAD_PROCESS 7 +#endif +#ifndef RUN_LVL +#define RUN_LVL 8 +#endif +#ifndef ACCOUNTING +#define ACCOUNTING 9 +#endif +#else /* __dietlibc__ */ +#define utmpx utmp +#endif /* __dietlibc__ */ + +extern void endutxent(void); +extern struct utmpx *getutxent(void); +extern struct utmpx *getutxid(const struct utmpx *); +extern struct utmpx *getutxline(const struct utmpx *); +extern struct utmpx *pututxline(const struct utmpx *); +extern void setutxent(void); +extern int utmpxname(const char *); +extern void updwtmpx(const char *, const struct utmpx *); +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __UCLIBC__ || + __OpenBSD__ || __DragonFly__ || __APPLE__ */ diff --git a/libcommon/asciitype.c b/libcommon/asciitype.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)asciitype.c 1.4 (gritter) 4/17/03 */ + +#include "asciitype.h" + +const unsigned char class_char[] = { +/* 000 nul 001 soh 002 stx 003 etx 004 eot 005 enq 006 ack 007 bel */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 010 bs 011 ht 012 nl 013 vt 014 np 015 cr 016 so 017 si */ + C_CNTRL,C_BLANK,C_WHITE,C_SPACE,C_SPACE,C_SPACE,C_CNTRL,C_CNTRL, +/* 020 dle 021 dc1 022 dc2 023 dc3 024 dc4 025 nak 026 syn 027 etb */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 030 can 031 em 032 sub 033 esc 034 fs 035 gs 036 rs 037 us */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 040 sp 041 ! 042 " 043 # 044 $ 045 % 046 & 047 ' */ + C_BLANK,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 050 ( 051 ) 052 * 053 + 054 , 055 - 056 . 057 / */ + C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 060 0 061 1 062 2 063 3 064 4 065 5 066 6 067 7 */ + C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL, +/* 070 8 071 9 072 : 073 ; 074 < 075 = 076 > 077 ? */ + C_DIGIT,C_DIGIT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 100 @ 101 A 102 B 103 C 104 D 105 E 106 F 107 G */ + C_PUNCT,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 110 H 111 I 112 J 113 K 114 L 115 M 116 N 117 O */ + C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 120 P 121 Q 122 R 123 S 124 T 125 U 126 V 127 W */ + C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 130 X 131 Y 132 Z 133 [ 134 \ 135 ] 136 ^ 137 _ */ + C_UPPER,C_UPPER,C_UPPER,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 140 ` 141 a 142 b 143 c 144 d 145 e 146 f 147 g */ + C_PUNCT,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 150 h 151 i 152 j 153 k 154 l 155 m 156 n 157 o */ + C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 160 p 161 q 162 r 163 s 164 t 165 u 166 v 167 w */ + C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 170 x 171 y 172 z 173 { 174 | 175 } 176 ~ 177 del */ + C_LOWER,C_LOWER,C_LOWER,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_CNTRL +}; diff --git a/libcommon/asciitype.h b/libcommon/asciitype.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)asciitype.h 1.6 (gritter) 9/9/05 */ + +/* + * Locale-independent character classes. + */ +enum { + C_CNTRL = 0000, + C_BLANK = 0001, + C_WHITE = 0002, + C_SPACE = 0004, + C_PUNCT = 0010, + C_OCTAL = 0020, + C_DIGIT = 0040, + C_UPPER = 0100, + C_LOWER = 0200 +}; + +extern const unsigned char class_char[]; + +#define asciichar(c) ((unsigned)(c) <= 0177) +#define alnumchar(c) (asciichar(c)&&(class_char[c]&\ + (C_DIGIT|C_OCTAL|C_UPPER|C_LOWER))) +#define alphachar(c) (asciichar(c)&&(class_char[c]&(C_UPPER|C_LOWER))) +#define blankchar(c) (asciichar(c)&&(class_char[c]&(C_BLANK))) +#define cntrlchar(c) (asciichar(c)&&(class_char[c]==C_CNTRL) +#define digitchar(c) (asciichar(c)&&(class_char[c]&(C_DIGIT|C_OCTAL))) +#define lowerchar(c) (asciichar(c)&&(class_char[c]&(C_LOWER))) +#define punctchar(c) (asciichar(c)&&(class_char[c]&(C_PUNCT))) +#define spacechar(c) (asciichar(c)&&(class_char[c]&(C_BLANK|C_SPACE|C_WHITE))) +#define upperchar(c) (asciichar(c)&&(class_char[c]&(C_UPPER))) +#define whitechar(c) (asciichar(c)&&(class_char[c]&(C_BLANK|C_WHITE))) +#define octalchar(c) (asciichar(c)&&(class_char[c]&(C_OCTAL))) +#define graphchar(c) (asciichar(c)&&(class_char[c]&\ + (C_UPPER|C_LOWER|C_DIGIT|C_OCTAL|C_PUNCT))) +#define printchar(c) ((c)==' ' || asciichar(c)&&(class_char[c]&\ + (C_UPPER|C_LOWER|C_DIGIT|C_OCTAL|C_PUNCT))) + +#define upperconv(c) (lowerchar(c) ? (c)-'a'+'A' : (c)) +#define lowerconv(c) (upperchar(c) ? (c)-'A'+'a' : (c)) diff --git a/libcommon/atoll.h b/libcommon/atoll.h @@ -0,0 +1,8 @@ +/* Sccsid @(#)atoll.h 1.4 (gritter) 7/18/04 */ + +#if defined (__hpux) || defined (_AIX) || \ + defined (__FreeBSD__) && (__FreeBSD__) < 5 +extern long long strtoll(const char *nptr, char **endptr, int base); +extern unsigned long long strtoull(const char *nptr, char **endptr, int base); +extern long long atoll(const char *nptr); +#endif /* __hpux || _AIX || __FreeBSD__ < 5 */ diff --git a/libcommon/blank.h b/libcommon/blank.h @@ -0,0 +1,38 @@ +/* + * isblank() and iswblank() are not available with many pre-XSH6 + * systems. Check whether isblank was defined, and assume it is + * not available if not. + */ +/* Sccsid @(#)blank.h 1.3 (gritter) 5/1/04 */ + +#ifndef __dietlibc__ +#ifndef LIBCOMMON_BLANK_H +#define LIBCOMMON_BLANK_H 1 + +#include <ctype.h> +#include <wctype.h> + +#ifndef isblank + +static +#ifdef __GNUC__ +__inline__ +#endif /* __GNUC__ */ +int +my_isblank(int c) +{ + return c == ' ' || c == '\t'; +} +#define isblank(c) my_isblank(c) + +static int +my_iswblank(wint_t c) +{ + return c == L' ' || c == L'\t'; +} +#undef iswblank +#define iswblank(c) my_iswblank(c) + +#endif /* !isblank */ +#endif /* !LIBCOMMON_BLANK_H */ +#endif /* !__dietlibc__ */ diff --git a/libcommon/depsinc.mk b/libcommon/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libcommon_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libcommon_DEPDIR -lcommon diff --git a/libcommon/getdir.c b/libcommon/getdir.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)getdir.c 1.20 (gritter) 5/14/06 */ + +#ifndef __linux__ +/* + * 32-bit Solaris and Open UNIX do not have 64-bit getdents(); but + * having _FILE_OFFSET_BITS=64 will make it use a dirent64 struct + * on Open UNIX -> SEGV. + */ +#undef _FILE_OFFSET_BITS +#endif /* !__linux__ */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#if defined (__UCLIBC__) +#include <linux/types.h> +#include <linux/dirent.h> +#define getdents(a, b, c) __getdents64(a, b, c) +#define dirent dirent64 +extern int getdents(int, struct dirent *, size_t); +#elif defined (__GLIBC__) || defined (__FreeBSD__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <dirent.h> +#define getdents(a, b, c) getdirentries((a), (char *)(b), (c), &(db->g_offs)) +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#undef d_ino +#endif /* __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __DragonFly__ + || __APPLE__ */ +#elif defined (__dietlibc__) +#include <dirent.h> +#include <unistd.h> +#else /* !__GLIBC__, !__dietlibc__ */ +#ifdef __hpux +#define _KERNEL +#endif /* __hpux */ +#include <dirent.h> +#ifdef __hpux +#ifndef _INO64_T +typedef unsigned long long uint64_t; +typedef uint64_t ino64_t; +#endif /* !_INO64_T */ +#ifdef __LP64__ +#define dirent __dirent64 +#else /* !__LP64__ */ +#define dirent __dirent32 +#endif /* !__LP64__ */ +#define d_reclen __d_reclen +#define d_name __d_name +#define d_ino __d_ino +#endif /* __hpux */ +#endif /* !__GLIBC__, !__dietlibc__ */ + +#include "getdir.h" + +#define DIBSIZE 5120 + +struct getdb { +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + off_t g_offs; +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + long g_offs; +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + struct dirent *g_dirp; + const char *g_path; + struct direc g_dic; + union { + char g_dirbuf[DIBSIZE+1]; + struct dirent g_dummy[1]; + } g_u; + int g_num; + int g_fd; +}; + +struct getdb * +getdb_alloc(const char *path, int fd) +{ + struct getdb *db; + + if ((db = malloc(sizeof *db)) == NULL) + return NULL; + db->g_dirp = NULL; + db->g_offs = 0; + db->g_fd = fd; + db->g_path = path; + return db; +} + +void +getdb_free(struct getdb *db) +{ + free(db); +} + +struct direc * +getdir(struct getdb *db, int *err) +{ + int reclen; + + *err = 0; + while (db->g_dirp == NULL) + { + /*LINTED*/ + db->g_num = getdents(db->g_fd, + (struct dirent *)db->g_u.g_dirbuf, + DIBSIZE); + if (db->g_num <= 0) { + if (db->g_num < 0) + *err = errno; + db->g_offs = 0; + return NULL; + } + /*LINTED*/ + db->g_dirp = (struct dirent *)db->g_u.g_dirbuf; + while (db->g_dirp && +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + db->g_dirp->d_ino == 0 +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + (db->g_dirp->d_fileno == 0 +#ifdef DT_WHT + || db->g_dirp->d_type == DT_WHT +#endif + ) +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + ) + { + next: +#ifndef __DragonFly__ + reclen = db->g_dirp->d_reclen; +#else + reclen = _DIRENT_DIRSIZ(db->g_dirp); +#endif + if ((db->g_num -= reclen) == 0 || reclen == 0) + db->g_dirp = NULL; + else + db->g_dirp = + /*LINTED*/ + (struct dirent *)((char *)db->g_dirp + + reclen); + } + } +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + if (db->g_dirp->d_ino == 0) + goto next; + db->g_dic.d_ino = db->g_dirp->d_ino; +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + if (db->g_dirp->d_fileno == 0 +#ifdef DT_WHT + || db->g_dirp->d_type == DT_WHT +#endif + ) + { + goto next; + } + db->g_dic.d_ino = db->g_dirp->d_fileno; +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + db->g_dic.d_name = db->g_dirp->d_name; +#ifndef __DragonFly__ + reclen = db->g_dirp->d_reclen; +#else + reclen = _DIRENT_DIRSIZ(db->g_dirp); +#endif + if ((db->g_num -= reclen) == 0 || reclen == 0) + db->g_dirp = NULL; + else + /*LINTED*/ + db->g_dirp = (struct dirent *)((char *)db->g_dirp + reclen); + return &(db->g_dic); +} diff --git a/libcommon/getdir.h b/libcommon/getdir.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)getdir.h 1.4 (gritter) 10/19/03 */ + +#include <sys/types.h> + +struct direc { + unsigned long long d_ino; + char *d_name; +}; + +extern struct getdb *getdb_alloc(const char *, int); +extern void getdb_free(struct getdb *); +extern struct direc *getdir(struct getdb *, int *); diff --git a/libcommon/getopt.c b/libcommon/getopt.c @@ -0,0 +1,141 @@ +/* + * getopt() - command option parsing + * + * Gunnar Ritter, Freiburg i. Br., Germany, March 2002. + */ + +/* Sccsid @(#)getopt.c 1.6 (gritter) 12/16/07 */ + +#include <sys/types.h> +#include <alloca.h> +#include <string.h> +#include "msgselect.h" + +/* + * One should not think that re-implementing this is necessary, but + * + * - Some libcs print weird messages. + * + * - GNU libc getopt() is totally brain-damaged, as it requires special + * care _not_ to reorder parameters and can't be told to work correctly + * with ':' as first optstring character at all. + */ + +char *optarg = 0; +int optind = 1; +int opterr = 1; +int optopt = 0; +extern char *pfmt_label__; + +static void +error(const char *s, int c) +{ + /* + * Avoid including <unistd.h>, in case its getopt() declaration + * conflicts. + */ + extern ssize_t write(int, const void *, size_t); + const char *msg = 0; + char *buf, *bp; + + if (pfmt_label__) + s = pfmt_label__; + switch (c) { + case '?': + msg = ": " msgselect("I","i") "llegal option -- "; + break; + case ':': + msg = ": " msgselect("O","o") "ption requires an argument -- "; + break; + } + bp = buf = alloca(strlen(s) + strlen(msg) + 2); + while (*s) + *bp++ = *s++; + while (*msg) + *bp++ = *msg++; + *bp++ = optopt; + *bp++ = '\n'; + write(2, buf, bp - buf); +} + +int +getopt(int argc, char *const argv[], const char *optstring) +{ + int colon; + static const char *lastp; + const char *curp; + + if (optstring[0] == ':') { + colon = 1; + optstring++; + } else + colon = 0; + if (lastp) { + curp = lastp; + lastp = 0; + } else { + if (optind >= argc || argv[optind] == 0 || + argv[optind][0] != '-' || + argv[optind][1] == '\0') + return -1; + if (argv[optind][1] == '-' && argv[optind][2] == '\0') { + optind++; + return -1; + } + curp = &argv[optind][1]; + } + optopt = curp[0] & 0377; + while (optstring[0]) { + if (optstring[0] == ':') { + optstring++; + continue; + } + if ((optstring[0] & 0377) == optopt) { + if (optstring[1] == ':') { + if (curp[1] != '\0') { + optarg = (char *)&curp[1]; + optind++; + } else { + if ((optind += 2) > argc) { + if (!colon && opterr) + error(argv[0], ':'); + return colon ? ':' : '?'; + } + optarg = argv[optind - 1]; + } + } else { + if (curp[1] != '\0') + lastp = &curp[1]; + else + optind++; + optarg = 0; + } + return optopt; + } + optstring++; + } + if (!colon && opterr) + error(argv[0], '?'); + if (curp[1] != '\0') + lastp = &curp[1]; + else + optind++; + optarg = 0; + return '?'; +} + +#ifdef __APPLE__ +/* + * Starting with Mac OS 10.5 Leopard, <unistd.h> turns getopt() + * into getopt$UNIX2003() by default. Consequently, this function + * is called instead of the one defined above. However, optind is + * still taken from this file, so in effect, options are not + * properly handled. Defining an own getopt$UNIX2003() function + * works around this issue. + */ +int +getopt$UNIX2003(int argc, char *const argv[], const char *optstring) +{ + return getopt(argc, argv, optstring); +} +#endif /* __APPLE__ */ diff --git a/libcommon/gmatch.c b/libcommon/gmatch.c @@ -0,0 +1,136 @@ +/* + * Derived from /usr/src/cmd/sh/expand.c, Unix 7th Edition: + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)gmatch.sl 1.5 (gritter) 5/29/05"; + +#include <stdlib.h> +#include <wchar.h> +#include <limits.h> + +#include "mbtowi.h" + +#define fetch(wc, s, n) ((mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowi(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc = WEOF, 1) : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)), (s) += (n), (wc)) + +int +gmatch(const char *s, const char *p) +{ + const char *bs = s; + int mb_cur_max = MB_CUR_MAX; + wint_t c, scc; + int n; + + if (fetch(scc, s, n) == WEOF) + return (0); + switch (fetch(c, p, n)) { + + case '[': { + int ok = 0, excl; + unsigned long lc = ULONG_MAX; + const char *bp; + + if (*p == '!') { + p++; + excl = 1; + } else + excl = 0; + fetch(c, p, n); + bp = p; + while (c != '\0') { + if (c == ']' && p > bp) + return (ok ^ excl ? gmatch(s, p) : 0); + else if (c == '-' && p > bp && *p != ']') { + if (*p == '\\') + p++; + if (fetch(c, p, n) == '\0') + break; + if (lc <= scc && scc <= c) + ok = 1; + } else { + if (c == '\\') { + if (fetch(c, p, n) == '\0') + break; + } + if (scc == (lc = c)) + ok = 1; + } + fetch(c, p, n); + } + return (0); + } + + case '\\': + fetch(c, p, n); + if (c == '\0') + return (0); + /*FALLTHRU*/ + + default: + if (c != scc) + return (0); + /*FALLTHRU*/ + + case '?': + return (scc ? gmatch(s, p) : 0); + + case '*': + if (*p == '\0') + return (1); + s = bs; + while (*s) { + if (gmatch(s, p)) + return (1); + fetch(scc, s, n); + } + return (0); + + case '\0': + return (scc == '\0'); + + case WEOF: + return (0); + + } +} diff --git a/libcommon/ib_alloc.c b/libcommon/ib_alloc.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_alloc.c 1.5 (gritter) 3/12/05 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <malloc.h> + +#include "memalign.h" +#include "iblok.h" + +struct iblok * +ib_alloc(int fd, unsigned blksize) +{ + static long pagesize; + struct iblok *ip; + struct stat st; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if (blksize == 0) { + if (fstat(fd, &st) < 0) + return NULL; + blksize = st.st_blksize > 0 ? st.st_blksize : 512; + } + if ((ip = calloc(1, sizeof *ip)) == NULL) + return NULL; + if ((ip->ib_blk = memalign(pagesize, blksize)) == NULL) { + free(ip); + return NULL; + } + ip->ib_blksize = blksize; + ip->ib_fd = fd; + ip->ib_mb_cur_max = MB_CUR_MAX; + return ip; +} diff --git a/libcommon/ib_close.c b/libcommon/ib_close.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_close.c 1.2 (gritter) 4/17/03 */ + +#include <unistd.h> + +#include "iblok.h" + +int +ib_close(struct iblok *ip) +{ + int fd; + + fd = ip->ib_fd; + ib_free(ip); + return close(fd); +} diff --git a/libcommon/ib_free.c b/libcommon/ib_free.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_free.c 1.2 (gritter) 4/17/03 */ + +#include <stdlib.h> + +#include "iblok.h" + +void +ib_free(struct iblok *ip) +{ + free(ip->ib_blk); + free(ip); +} diff --git a/libcommon/ib_getlin.c b/libcommon/ib_getlin.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_getlin.c 1.2 (gritter) 4/17/03 */ + +#include <string.h> +#include <stdlib.h> +#include "iblok.h" + +size_t +ib_getlin(struct iblok *ip, char **line, size_t *alcd, + void *(*reallc)(void *, size_t)) +{ + char *nl; + size_t sz, llen = 0, nllen; + + for (;;) { + if (ip->ib_cur >= ip->ib_end) { + if (ip->ib_incompl) { + ip->ib_incompl = 0; + return 0; + } + if (ib_read(ip) == EOF) { + if (llen) { + ip->ib_incompl++; + (*line)[llen] = '\0'; + return llen; + } else + return 0; + } + /* + * ib_read() advances ib_cur since *ib_cur++ gives + * better performance than *++ib_cur for ib_get(). + * Go back again. + */ + ip->ib_cur--; + } + sz = ip->ib_end - ip->ib_cur; + if ((nl = memchr(ip->ib_cur, '\n', sz)) != NULL) { + sz = nl - ip->ib_cur + 1; + if ((nllen = llen + sz + 1) > *alcd) { + *line = reallc(*line, nllen); + *alcd = nllen; + } + memcpy(&(*line)[llen], ip->ib_cur, sz); + (*line)[llen + sz] = '\0'; + ip->ib_cur = nl + 1; + return llen + sz; + } + if ((nllen = llen + sz + 1) > *alcd) { + *line = reallc(*line, nllen); + *alcd = nllen; + } + memcpy(&(*line)[llen], ip->ib_cur, sz); + llen += sz; + ip->ib_cur = ip->ib_end; + } + /*NOTREACHED*/ + return 0; +} diff --git a/libcommon/ib_getw.c b/libcommon/ib_getw.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_getw.c 1.5 (gritter) 7/16/04 */ + +#include <stdlib.h> +#include <string.h> +#include "iblok.h" +#include "mbtowi.h" + +char * +ib_getw(struct iblok *ip, wint_t *wc, int *len) +{ + size_t rest; + int c, i, n; + + i = 0; + rest = ip->ib_mend - ip->ib_mcur; + if (rest && ip->ib_mcur > ip->ib_mbuf) { + do + ip->ib_mbuf[i] = ip->ib_mcur[i]; + while (i++, --rest); + } else if (ip->ib_incompl) { + ip->ib_incompl = 0; + *wc = WEOF; + ip->ib_mend = ip->ib_mcur = NULL; + return NULL; + } + if (i == 0) { + c = ib_get(ip); + if (c == EOF) { + *wc = WEOF; + ip->ib_mend = ip->ib_mcur = NULL; + return NULL; + } + ip->ib_mbuf[i++] = (char)c; + } + if (ip->ib_mbuf[0] & 0200) { + while (ip->ib_mbuf[i-1] != '\n' && i < ip->ib_mb_cur_max && + ip->ib_incompl == 0) { + c = ib_get(ip); + if (c != EOF) + ip->ib_mbuf[i++] = (char)c; + else + ip->ib_incompl = 1; + } + n = mbtowi(wc, ip->ib_mbuf, i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = ip->ib_mbuf[0]; + *len = n = 1; + } + ip->ib_mcur = &ip->ib_mbuf[*len]; + ip->ib_mend = &ip->ib_mcur[i - *len]; + return ip->ib_mbuf; +} diff --git a/libcommon/ib_open.c b/libcommon/ib_open.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_open.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +struct iblok * +ib_open(const char *name, unsigned blksize) +{ + struct iblok *ip; + int fd, err; + + if ((fd = open(name, O_RDONLY)) < 0) + return NULL; + if ((ip = ib_alloc(fd, blksize)) == NULL) { + err = errno; + close(fd); + errno = err; + } + return ip; +} diff --git a/libcommon/ib_popen.c b/libcommon/ib_popen.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_popen.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <signal.h> + +#include "iblok.h" + +struct iblok * +ib_popen(const char *cmd, unsigned blksize) +{ + struct iblok *ip; + int fd[2], err; + pid_t pid; + char *shell; + + if (pipe(fd) < 0) + return NULL; + switch (pid = fork()) { + case -1: + return NULL; + case 0: + close(fd[0]); + dup2(fd[1], 1); + close(fd[1]); + if ((shell = getenv("SHELL")) == NULL) + shell = "/bin/sh"; + execl(shell, shell, "-c", cmd, NULL); + _exit(0177); + /*NOTREACHED*/ + } + close(fd[1]); + if ((ip = ib_alloc(fd[0], blksize)) == NULL) { + err = errno; + close(fd[0]); + errno = err; + } + ip->ib_pid = pid; + return ip; +} + +int +ib_pclose(struct iblok *ip) +{ + struct sigaction oldhup, oldint, oldquit, act; + int status; + + close(ip->ib_fd); + act.sa_handler = SIG_IGN; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGHUP, &act, &oldhup); + sigaction(SIGINT, &act, &oldint); + sigaction(SIGQUIT, &act, &oldquit); + while (waitpid(ip->ib_pid, &status, 0) < 0 && errno == EINTR); + sigaction(SIGHUP, &oldhup, NULL); + sigaction(SIGINT, &oldint, NULL); + sigaction(SIGQUIT, &oldquit, NULL); + return status; +} diff --git a/libcommon/ib_read.c b/libcommon/ib_read.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_read.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +int +ib_read(struct iblok *ip) +{ + ssize_t sz; + + do { + if ((sz = read(ip->ib_fd, ip->ib_blk, ip->ib_blksize)) > 0) { + ip->ib_endoff += sz; + ip->ib_cur = ip->ib_blk; + ip->ib_end = &ip->ib_blk[sz]; + return *ip->ib_cur++ & 0377; + } + } while (sz < 0 && errno == EINTR); + if (sz < 0) + ip->ib_errno = errno; + ip->ib_cur = ip->ib_end = NULL; + return EOF; +} diff --git a/libcommon/ib_seek.c b/libcommon/ib_seek.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_seek.c 1.4 (gritter) 5/8/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +off_t +ib_seek(struct iblok *ip, off_t off, int whence) +{ + if (whence == SEEK_CUR) { + off = ip->ib_endoff - (ip->ib_end - ip->ib_cur); + whence = SEEK_SET; + } + if (ip->ib_seekable && whence == SEEK_SET && ip->ib_cur && ip->ib_end && + off < ip->ib_endoff && + off >= ip->ib_endoff - (ip->ib_end - ip->ib_blk)) { + ip->ib_cur = ip->ib_end - (ip->ib_endoff - off); + return off; + } + if ((off = lseek(ip->ib_fd, off, whence)) == (off_t)-1) + return -1; + ip->ib_cur = ip->ib_end = NULL; + ip->ib_endoff = off; + ip->ib_seekable = 1; + return off; +} diff --git a/libcommon/iblok.h b/libcommon/iblok.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)iblok.h 1.5 (gritter) 7/16/04 */ + +/* + * Functions to read a file sequentially. + */ + +#include <sys/types.h> /* for off_t, pid_t */ +#include <stdio.h> /* for EOF */ +#include <wchar.h> /* for wchar_t */ +#include <limits.h> /* for MB_LEN_MAX */ + +struct iblok { + long long ib_endoff; /* offset of endc from start of file */ + char ib_mbuf[MB_LEN_MAX+1]; /* multibyte overflow buffer */ + char *ib_mcur; /* next byte to read in ib_mbuf */ + char *ib_mend; /* one beyond last byte in ib_mbuf */ + char *ib_blk; /* buffered data */ + char *ib_cur; /* next character in ib_blk */ + char *ib_end; /* one beyond last byte in ib_blk */ + int ib_fd; /* input file descriptor */ + int ib_errno; /* errno on error, or 0 */ + int ib_incompl; /* had an incomplete last line */ + int ib_mb_cur_max; /* MB_CUR_MAX at time of ib_alloc() */ + int ib_seekable; /* had a successful lseek() */ + pid_t ib_pid; /* child from ib_popen() */ + unsigned ib_blksize; /* buffer size */ +}; + +/* + * Allocate an input buffer with file descriptor fd. blksize may be + * either the size of a buffer to allocate in ib_blk, or 0 if the + * size is determined automatically. On error, NULL is returned and + * errno indicates the offending error. + */ +extern struct iblok *ib_alloc(int fd, unsigned blksize); + +/* + * Deallocate the passed input buffer. The file descriptor is not + * closed. + */ +extern void ib_free(struct iblok *ip); + +/* + * Open file name and do ib_alloc() on the descriptor. + */ +extern struct iblok *ib_open(const char *name, unsigned blksize); + +/* + * Close the file descriptor in ip and do ib_free(). Return value is + * the result of close(). + */ +extern int ib_close(struct iblok *ip); + +/* + * A workalike of popen(cmd, "r") using iblok facilities. + */ +extern struct iblok *ib_popen(const char *cmd, unsigned blksize); + +/* + * Close an iblok opened with ib_popen(). + */ +extern int ib_pclose(struct iblok *ip); + +/* + * Read new input buffer. Returns the next character (or EOF) and advances + * ib_cur by one above the bottom of the buffer. + */ +extern int ib_read(struct iblok *ip); + +/* + * Get next character. Return EOF at end-of-file or read error. + */ +#define ib_get(ip) ((ip)->ib_cur < (ip)->ib_end ? *(ip)->ib_cur++ & 0377 :\ + ib_read(ip)) + +/* + * Unget a character. Note that this implementation alters the read buffer. + * Caution: Calling this macro more than once might underflow ib_blk. + */ +#define ib_unget(c, ip) (*(--(ip)->ib_cur) = (char)(c)) + +/* + * Get file offset of last read character. + */ +#define ib_offs(ip) ((ip)->ib_endoff - ((ip)->ib_end - (ip)->ib_cur - 1)) + +/* + * Read a wide character using ib_get() facilities. *wc is used to store + * the wide character, or WEOF if an invalid byte sequence was found. + * The number of bytes consumed is stored in *len. Return value is the + * corresponding byte sequence, or NULL at end-of-file in input. + * + * Note that it is not possible to mix calls to ib_getw() with calls to + * ib_get(), ib_unget() or ib_seek() unless the last character read by + * ib_getw() was L'\n'. + */ +extern char *ib_getw(struct iblok *ip, wint_t *wc, int *len); + +/* + * Get a line from ip, returning the line length. Further arguments are either + * the pointer to a malloc()ed buffer and a pointer to its size, or (NULL, 0) + * if ib_getlin() shall allocate the buffer itselves. ib_getlin() will use + * the realloc-style function reallc() to increase the buffer if necessary; + * this function is expected never to fail (i. e., it must longjmp() or abort + * if it cannot allocate a buffer of the demanded size). + * On end-of-file or error, 0 is returned. + */ +extern size_t ib_getlin(struct iblok *ip, char **line, size_t *alcd, + void *(*reallc)(void *, size_t)); + +/* + * Like lseek(). + */ +extern off_t ib_seek(struct iblok *ip, off_t off, int whence); diff --git a/libcommon/mbtowi.h b/libcommon/mbtowi.h @@ -0,0 +1,22 @@ +/* Sccsid @(#)mbtowi.h 1.2 (gritter) 7/16/04 */ + +#ifndef LIBCOMMON_MBTOWI_H +#define LIBCOMMON_MBTOWI_H + +static +#if defined (__GNUC__) || defined (__USLC__) || defined (__INTEL_COMPILER) || \ + defined (__IBMC__) || defined (__SUNPRO_C) + inline +#endif + int +mbtowi(wint_t *pwi, const char *s, size_t n) +{ + wchar_t wc; + int i; + + i = mbtowc(&wc, s, n); + *pwi = wc; + return i; +} + +#endif /* !LIBCOMMON_MBTOWI_H */ diff --git a/libcommon/memalign.c b/libcommon/memalign.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)memalign.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +/* + * FreeBSD malloc(3) promises to page-align the return of malloc() calls + * if size is at least a page. This serves for a poor man's memalign() + * implementation that matches our needs. + */ +#include <unistd.h> +#include <stdlib.h> + +#include "memalign.h" + +void * +memalign(size_t alignment, size_t size) +{ + static long pagesize; + + if (pagesize == 0) + pagesize = sysconf(_SC_PAGESIZE); + if (alignment != pagesize) + return NULL; + if (size < pagesize) + size = pagesize; + return malloc(size); +} +#endif /* __FreeBSD__ || __dietlibc__ || _AIX || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/memalign.h b/libcommon/memalign.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)memalign.h 1.7 (gritter) 1/22/06 */ + +#ifndef LIBCOMMON_MEMALIGN_H +#define LIBCOMMON_MEMALIGN_H + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <stdlib.h> + +extern void *memalign(size_t, size_t); +#endif /* __FreeBSD__ || __dietlibc__ || _AIX || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ +#endif /* !LIBCOMMON_MEMALIGN_H */ diff --git a/libcommon/mkfile b/libcommon/mkfile @@ -0,0 +1,61 @@ +LIB = libcommon.a +LOBJ = asciitype.o ib_alloc.o ib_close.o ib_free.o ib_getlin.o ib_getw.o \ + ib_open.o ib_popen.o ib_read.o ib_seek.o oblok.o sfile.o strtol.o \ + getdir.o regexpr.o gmatch.o utmpx.o memalign.o pathconf.o \ + sigset.o signal.o sigrelse.o sighold.o sigignore.o sigpause.o \ + getopt.o pfmt.o vpfmt.o setlabel.o setuxlabel.o pfmt_label.o sysv3.o +TARG = CHECK +CLEAN_FILES = alloca.h malloc.h utmpx.h + +<$mkbuild/mk.common + +libcommon.a:Q: headers $LOBJ + echo AR $target + $AR -rv $target $LOBJ + echo RANLIB $target + $RANLIB $target + +CHECK:Q: CHECK.c + echo CC CHECK + $CC $CFLAGS $CPPFLAGS -E CHECK.c >CHECK + +headers:Q: CHECK + one() { + echo "" + rm -f "$1.h" + if grep "$1_h[ ]*=[ ]*[^0][ ]*;" CHECK >/dev/null; + then + ln -s "_$1.h" "$1.h" + fi + } + one alloca + one malloc + one utmpx + +asciitype.o: asciitype.h +ib_alloc.o: iblok.h +ib_close.o: iblok.h +ib_free.o: iblok.h +ib_getlin.o: iblok.h +ib_getw.o: iblok.h +ib_open.o: iblok.h +ib_read.o: iblok.h +ib_seek.o: iblok.h +iblok.o: iblok.h +oblok.o: oblok.h +sfile.o: sfile.h +getdir.o: getdir.h +regexpr.o: regexpr.h regexp.h +pfmt.o: pfmt.h +vpfmt.o: pfmt.h +setlabel.o: pfmt.h +setuxlabel.o: pfmt.h msgselect.h +getopt.o: msgselect.h +sighold.o: sigset.h +sigignore.o: sigset.h +sigpause.o: sigset.h +sigrelse.o: sigset.h +sigset.o: sigset.h +signal.o: sigset.h +pathconf.o: pathconf.h + diff --git a/libcommon/msgselect.h b/libcommon/msgselect.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)msgselect.h 1.2 (gritter) 9/21/03 */ + +#define MSG_LEVEL 0 + +#if MSG_LEVEL == 1 +#define msgselect(a, b) a +#else +#define msgselect(a, b) b +#endif diff --git a/libcommon/oblok.c b/libcommon/oblok.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)oblok.c 1.7 (gritter) 7/16/04 */ + +#include <sys/types.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> + +#include "memalign.h" +#include "oblok.h" + +struct list { + struct list *l_nxt; + struct oblok *l_op; +}; + +static struct list *bloks; +static int exitset; + +int +ob_clear(void) +{ + struct list *lp; + int val = 0; + + for (lp = bloks; lp; lp = lp->l_nxt) { + if (ob_flush(lp->l_op) < 0) + val = -1; + else if (val >= 0) + val++; + } + return val; +} + +static void +add(struct oblok *op) +{ + struct list *lp, *lq; + + if ((lp = calloc(1, sizeof *lp)) != NULL) { + lp->l_nxt = NULL; + lp->l_op = op; + if (bloks) { + for (lq = bloks; lq->l_nxt; lq = lq->l_nxt); + lq->l_nxt = lp; + } else + bloks = lp; + if (exitset == 0) { + exitset = 1; + atexit((void (*)(void))ob_clear); + } + } +} + +static void +del(struct oblok *op) +{ + struct list *lp, *lq = NULL; + + if (bloks) { + for (lp = bloks; lp && lp->l_op != op; lp = lp->l_nxt) + lq = lp; + if (lp) { + if (lq) + lq->l_nxt = lp->l_nxt; + if (lp == bloks) + bloks = bloks->l_nxt; + free(lp); + } + } +} + +struct oblok * +ob_alloc(int fd, enum ob_mode bf) +{ + static long pagesize; + struct oblok *op; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if ((op = memalign(pagesize, sizeof *op)) == NULL) + return NULL; + memset(op, 0, sizeof *op); + op->ob_fd = fd; + switch (bf) { + case OB_EBF: + op->ob_bf = isatty(fd) ? OB_LBF : OB_FBF; + break; + default: + op->ob_bf = bf; + } + add(op); + return op; +} + +ssize_t +ob_free(struct oblok *op) +{ + ssize_t wrt; + + wrt = ob_flush(op); + del(op); + free(op); + return wrt; +} + +static ssize_t +swrite(int fd, const char *data, size_t sz) +{ + ssize_t wo, wt = 0; + + do { + if ((wo = write(fd, data + wt, sz - wt)) < 0) { + if (errno == EINTR) + continue; + else + return wt; + } + wt += wo; + } while (wt < sz); + return sz; +} + +ssize_t +ob_write(struct oblok *op, const char *data, size_t sz) +{ + ssize_t wrt; + size_t di, isz; + + switch (op->ob_bf) { + case OB_NBF: + wrt = swrite(op->ob_fd, data, sz); + op->ob_wrt += wrt; + if (wrt != sz) { + op->ob_bf = OB_EBF; + writerr(op, sz, wrt>0?wrt:0); + return -1; + } + return wrt; + case OB_LBF: + case OB_FBF: + isz = sz; + while (op->ob_pos + sz > (OBLOK)) { + di = (OBLOK) - op->ob_pos; + sz -= di; + if (op->ob_pos > 0) { + memcpy(&op->ob_blk[op->ob_pos], data, di); + wrt = swrite(op->ob_fd, op->ob_blk, (OBLOK)); + } else + wrt = swrite(op->ob_fd, data, (OBLOK)); + op->ob_wrt += wrt; + if (wrt != (OBLOK)) { + op->ob_bf = OB_EBF; + writerr(op, (OBLOK), wrt>0?wrt:0); + return -1; + } + data += di; + op->ob_pos = 0; + } + if (op->ob_bf == OB_LBF) { + const char *cp; + + cp = data; + while (cp < &data[sz]) { + if (*cp == '\n') { + di = cp - data + 1; + sz -= di; + if (op->ob_pos > 0) { + memcpy(&op->ob_blk[op->ob_pos], + data, di); + wrt = swrite(op->ob_fd, + op->ob_blk, + op->ob_pos + di); + } else + wrt = swrite(op->ob_fd, + data, di); + op->ob_wrt += wrt; + if (wrt != op->ob_pos + di) { + op->ob_bf = OB_EBF; + writerr(op, di, wrt>0?wrt:0); + return -1; + } + op->ob_pos = 0; + data += di; + cp = data; + } + cp++; + } + } + if (sz == (OBLOK)) { + wrt = swrite(op->ob_fd, data, sz); + op->ob_wrt += wrt; + if (wrt != sz) { + op->ob_bf = OB_EBF; + writerr(op, sz, wrt>0?wrt:0); + return -1; + } + } else if (sz) { + memcpy(&op->ob_blk[op->ob_pos], data, sz); + op->ob_pos += sz; + } + return isz; + case OB_EBF: + ; + } + return -1; +} + +ssize_t +ob_flush(struct oblok *op) +{ + ssize_t wrt = 0; + + if (op->ob_pos) { + wrt = swrite(op->ob_fd, op->ob_blk, op->ob_pos); + op->ob_wrt += wrt; + if (wrt != op->ob_pos) { + op->ob_bf = OB_EBF; + writerr(op, op->ob_pos, wrt>0?wrt:0); + wrt = -1; + } + op->ob_pos = 0; + } + return wrt; +} + +int +ob_chr(int c, struct oblok *op) +{ + char b; + ssize_t wrt; + + b = (char)c; + wrt = ob_write(op, &b, 1); + return wrt < 0 ? EOF : c; +} diff --git a/libcommon/oblok.h b/libcommon/oblok.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)oblok.h 1.3 (gritter) 4/17/03 */ + +#include <sys/types.h> + +#ifndef OBLOK +enum { + OBLOK = 4096 +}; +#endif /* !OBLOK */ + +enum ob_mode { + OB_EBF = 0, /* error or mode unset */ + OB_NBF = 1, /* not buffered */ + OB_LBF = 2, /* line buffered */ + OB_FBF = 3 /* fully buffered */ +}; + +struct oblok { + char ob_blk[OBLOK]; /* buffered data */ + long long ob_wrt; /* amount of data written */ + int ob_pos; /* position of first empty date byte */ + int ob_fd; /* file descriptor to write to */ + enum ob_mode ob_bf; /* buffering mode */ +}; + +/* + * Allocate an output buffer with file descriptor fd and buffer mode bf. + * If bf is OB_EBF, the choice is made dependant upon the file type. + * NULL is returned if no memory is available. + */ +extern struct oblok *ob_alloc(int fd, enum ob_mode bf); + +/* + * Deallocate the passed output buffer, flushing all data. The file + * descriptor is not closed. Returns -1 if flushing fails. + */ +extern ssize_t ob_free(struct oblok *op); + +/* + * Write data of length sz to the passed output buffer. Returns -1 on + * error or the amount of data written. + */ +extern ssize_t ob_write(struct oblok *op, const char *data, size_t sz); + +/* + * Flush all data in the passed output buffer. Returns -1 on error or + * the amount of data written; 0 is success and means 'nothing to flush'. + * The underlying device is not flushed (i. e. no fsync() is performed). + */ +extern ssize_t ob_flush(struct oblok *op); + +/* + * Flush all output buffers. Called automatically using atexit(). Returns + * -1 on error or the number of buffers flushed; 0 is success. + */ +extern int ob_clear(void); + +/* + * putc() workalike. + */ +#define ob_put(c, op) ((op)->ob_bf != OB_FBF || (op)->ob_pos >= (OBLOK) - 1 ?\ + ob_chr((c), (op)) : \ + (int)((op)->ob_blk[(op)->ob_pos++] = (char)(c))) + + +/* + * fputc() workalike. + */ +extern int ob_chr(int c, struct oblok *op); + +/* + * This function must be supplied by the calling code; it is called on + * write error. + */ +extern void writerr(struct oblok *op, int count, int written); diff --git a/libcommon/pathconf.c b/libcommon/pathconf.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pathconf.c 1.2 (gritter) 5/1/04 */ + +#ifdef __dietlibc__ +#include <unistd.h> +#include "pathconf.h" + +static long +pc(int name) +{ + switch (name) { + case _PC_PATH_MAX: + return 1024; + case _PC_VDISABLE: + return 0; + default: + return -1; + } +} + +long +fpathconf(int fildes, int name) +{ + return pc(name); +} + +long +pathconf(const char *path, int name) { + return pc(name); +} +#endif /* __dietlibc__ */ diff --git a/libcommon/pathconf.h b/libcommon/pathconf.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pathconf.h 1.2 (gritter) 5/1/04 */ + +#ifdef __dietlibc__ +#include <unistd.h> + +extern long fpathconf(int, int); +extern long pathconf(const char *, int); +#endif /* __dietlibc__ */ diff --git a/libcommon/pfmt.c b/libcommon/pfmt.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pfmt.c 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> +#include <stdarg.h> + +#include "pfmt.h" + +int +pfmt(FILE *stream, long flags, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vpfmt(stream, flags, fmt, ap); + va_end(ap); + return i; +} diff --git a/libcommon/pfmt.h b/libcommon/pfmt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pfmt.h 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> + +extern int pfmt(FILE *stream, long flags, const char *format, ...); + +#include <stdarg.h> + +extern int vpfmt(FILE *stream, long flags, const char *format, va_list ap); + +#define MM_HALT 0x00000001 +#define MM_ERROR 0x00000000 +#define MM_WARNING 0x00000002 +#define MM_INFO 0x00000004 +#define MM_ACTION 0x00000100 +#define MM_NOSTD 0x00000200 +#define MM_STD 0x00000000 +#define MM_NOGET 0x00000400 +#define MM_GET 0x00000000 + +extern int setlabel(const char *label); +extern int setuxlabel(const char *label); + +#define setcat(s) (s) +#define gettxt(n, s) (s) diff --git a/libcommon/pfmt_label.c b/libcommon/pfmt_label.c @@ -0,0 +1 @@ +char *pfmt_label__; diff --git a/libcommon/regexp.h b/libcommon/regexp.h @@ -0,0 +1,1211 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define REGEXP_H_USED __attribute__ ((used)) +#elif defined __GNUC__ +#define REGEXP_H_USED __attribute__ ((unused)) +#else +#define REGEXP_H_USED +#endif +static const char regexp_h_sccsid[] REGEXP_H_USED = + "@(#)regexp.sl 1.56 (gritter) 5/29/05"; + +#if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) +#define REGEXP_H_WCHARS +#endif + +#define CBRA 2 +#define CCHR 4 +#define CDOT 8 +#define CCL 12 +/* CLNUM 14 used in sed */ +/* CEND 16 used in sed */ +#define CDOL 20 +#define CCEOF 22 +#define CKET 24 +#define CBACK 36 +#define CNCL 40 +#define CBRC 44 +#define CLET 48 +#define CCH1 52 +#define CCH2 56 +#define CCH3 60 + +#define STAR 01 +#define RNGE 03 +#define REGEXP_H_LEAST 0100 + +#ifdef REGEXP_H_WCHARS +#define CMB 0200 +#else /* !REGEXP_H_WCHARS */ +#define CMB 0 +#endif /* !REGEXP_H_WCHARS */ + +#define NBRA 9 + +#define PLACE(c) ep[c >> 3] |= bittab[c & 07] +#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) + +#ifdef REGEXP_H_WCHARS +#define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) +#endif + +#include <ctype.h> +#include <string.h> +#include <limits.h> +#ifdef REGEXP_H_WCHARS +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> +#endif /* REGEXP_H_WCHARS */ + +#define regexp_h_uletter(c) (isalpha(c) || (c) == '_') +#ifdef REGEXP_H_WCHARS +#define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') + +/* + * Used to allocate memory for the multibyte star algorithm. + */ +#ifndef regexp_h_malloc +#define regexp_h_malloc(n) malloc(n) +#endif +#ifndef regexp_h_free +#define regexp_h_free(p) free(p) +#endif + +/* + * Can be predefined to 'inline' to inline some multibyte functions; + * may improve performance for files that contain many multibyte + * sequences. + */ +#ifndef regexp_h_inline +#define regexp_h_inline +#endif + +/* + * Mask to determine whether the first byte of a sequence possibly + * starts a multibyte character. Set to 0377 to force mbtowc() for + * any byte sequence (except 0). + */ +#ifndef REGEXP_H_MASK +#define REGEXP_H_MASK 0200 +#endif +#endif /* REGEXP_H_WCHARS */ + +/* + * For regexpr.h. + */ +#ifndef regexp_h_static +#define regexp_h_static +#endif +#ifndef REGEXP_H_STEP_INIT +#define REGEXP_H_STEP_INIT +#endif +#ifndef REGEXP_H_ADVANCE_INIT +#define REGEXP_H_ADVANCE_INIT +#endif + +char *braslist[NBRA]; +char *braelist[NBRA]; +int nbra; +char *loc1, *loc2, *locs; +int sed; +int nodelim; + +regexp_h_static int circf; +regexp_h_static int low; +regexp_h_static int size; + +regexp_h_static unsigned char bittab[] = { + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128 +}; +static int regexp_h_advance(register const char *lp, + register const char *ep); +static void regexp_h_getrnge(register const char *str, int least); + +static const char *regexp_h_bol; /* beginning of input line (for \<) */ + +#ifdef REGEXP_H_WCHARS +static int regexp_h_wchars; +static int regexp_h_mbcurmax; + +static const char *regexp_h_firstwc; /* location of first + multibyte character + on input line */ + +#define regexp_h_getwc(c) { \ + if (regexp_h_wchars) { \ + char mbbuf[MB_LEN_MAX + 1], *mbptr; \ + wchar_t wcbuf; \ + int mb, len; \ + mbptr = mbbuf; \ + do { \ + mb = GETC(); \ + *mbptr++ = mb; \ + *mbptr = '\0'; \ + } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ + && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ + if (len == -1) \ + ERROR(67); \ + c = wcbuf; \ + } else { \ + c = GETC(); \ + } \ +} + +#define regexp_h_store(wc, mb, me) { \ + int len; \ + if (wc == WEOF) \ + ERROR(67); \ + if ((len = me - mb) <= regexp_h_mbcurmax) { \ + char mt[MB_LEN_MAX]; \ + if (wctomb(mt, wc) >= len) \ + ERROR(50); \ + } \ + switch (len = wctomb(mb, wc)) { \ + case -1: \ + ERROR(67); \ + case 0: \ + mb++; \ + break; \ + default: \ + mb += len; \ + } \ +} + +static regexp_h_inline wint_t +regexp_h_fetchwc(const char **mb, int islp) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { + (*mb)++; + return WEOF; + } + if (islp && regexp_h_firstwc == NULL) + regexp_h_firstwc = *mb; + /*if (len == 0) { + (*mb)++; + return L'\0'; + } handled in singlebyte code */ + *mb += len; + return wc; +} + +#define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ + (*(mb)++&0377): \ + regexp_h_fetchwc(&(mb), islp)) + +static regexp_h_inline wint_t +regexp_h_showwc(const char *mb) +{ + wchar_t wc; + + if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) + return WEOF; + return wc; +} + +#define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ + regexp_h_showwc(mb)) + +/* + * Return the character immediately preceding mb. Since no byte is + * required to be the first byte of a character, the longest multibyte + * character ending at &[mb-1] is searched. + */ +static regexp_h_inline wint_t +regexp_h_previous(const char *mb) +{ + const char *p = mb; + wchar_t wc, lastwc = WEOF; + int len, max = 0; + + if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) + return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); + while (p-- > regexp_h_bol) { + mbtowc(NULL, NULL, 0); + if ((len = mbtowc(&wc, p, mb - p)) >= 0) { + if (len < max || len < mb - p) + break; + max = len; + lastwc = wc; + } else if (len < 0 && max > 0) + break; + } + return lastwc; +} + +#define regexp_h_cclass(set, c, af) \ + ((c) == 0 || (c) == WEOF ? 0 : ( \ + ((c) > 0177) ? \ + regexp_h_cclass_wc(set, c, af) : ( \ + REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ + ) \ + ) \ + ) + +static regexp_h_inline int +regexp_h_cclass_wc(const char *set, register wint_t c, int af) +{ + register wint_t wc, wl = WEOF; + const char *end; + + end = &set[18] + set[0] - 1; + set += 17; + while (set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (wc == '-' && wl != WEOF && set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c > wl && c < wc) + return af; + } + if (c == wc) + return af; + wl = wc; + } + return !af; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_wchars 0 +#define regexp_h_getwc(c) { c = GETC(); } +#endif /* !REGEXP_H_WCHARS */ + +regexp_h_static char * +compile(char *instring, char *ep, const char *endbuf, int seof) +{ + INIT /* Dependent declarations and initializations */ + register int c; + register int eof = seof; + char *lastep = instring; + int cclcnt; + char bracket[NBRA], *bracketp; + int closed; + char neg; + int lc; + int i, cflg; + +#ifdef REGEXP_H_WCHARS + char *eq; + regexp_h_mbcurmax = MB_CUR_MAX; + regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; +#endif + lastep = 0; + bracketp = bracket; + if((c = GETC()) == eof || c == '\n') { + if (c == '\n') { + UNGETC(c); + nodelim = 1; + } + if(*ep == 0 && !sed) + ERROR(41); + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + circf = closed = nbra = 0; + if (c == '^') + circf++; + else + UNGETC(c); + for (;;) { + if (ep >= endbuf) + ERROR(50); + regexp_h_getwc(c); + if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) + lastep = ep; + if (c == eof) { + *ep++ = CCEOF; + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + switch (c) { + + case '.': + *ep++ = CDOT|regexp_h_wchars; + continue; + + case '\n': + if (sed == 0) { + UNGETC(c); + *ep++ = CCEOF; + nodelim = 1; + RETURN(ep); + } + ERROR(36); + case '*': + if (lastep==0 || *lastep==CBRA || *lastep==CKET || + *lastep==(CBRC|regexp_h_wchars) || + *lastep==(CLET|regexp_h_wchars)) + goto defchar; + *lastep |= STAR; + continue; + + case '$': + if(PEEKC() != eof) + goto defchar; + *ep++ = CDOL; + continue; + + case '[': +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + if(&ep[33] >= endbuf) + ERROR(50); + + *ep++ = CCL; + lc = 0; + for(i = 0; i < 32; i++) + ep[i] = 0; + + neg = 0; + if((c = GETC()) == '^') { + neg = 1; + c = GETC(); + } + + do { + c &= 0377; + if(c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + c = GETC(); + c &= 0377; + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if(c == '-' && lc != 0) { + if ((c = GETC()) == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) + c = GETC(); +#endif /* REGEXP_H_VI_BACKSLASH */ + c &= 0377; + while(lc < c) { + PLACE(lc); + lc++; + } + } + lc = c; + PLACE(c); + } while((c = GETC()) != ']'); + if(neg) { + for(cclcnt = 0; cclcnt < 32; cclcnt++) + ep[cclcnt] ^= 0377; + ep[0] &= 0376; + } + + ep += 32; +#ifdef REGEXP_H_WCHARS + } else { + if (&ep[18] >= endbuf) + ERROR(50); + *ep++ = CCL|CMB; + *ep++ = 0; + lc = 0; + for (i = 0; i < 16; i++) + ep[i] = 0; + eq = &ep[16]; + regexp_h_getwc(c); + if (c == L'^') { + regexp_h_getwc(c); + ep[-2] = CNCL|CMB; + } + do { + if (c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c == '-' && lc != 0 && lc <= 0177) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + if (c == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) { + regexp_h_store(c, eq, + endbuf); + regexp_h_getwc(c); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + while (lc < (c & 0177)) { + PLACE(lc); + lc++; + } + } + lc = c; + if (c <= 0177) + PLACE(c); + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } while (c != L']'); + if ((i = eq - &ep[16]) > 255) + ERROR(50); + lastep[1] = i; + ep = eq; + } +#endif /* REGEXP_H_WCHARS */ + + continue; + + case '\\': + regexp_h_getwc(c); + switch(c) { + + case '(': + if(nbra >= NBRA) + ERROR(43); + *bracketp++ = nbra; + *ep++ = CBRA; + *ep++ = nbra++; + continue; + + case ')': + if(bracketp <= bracket) + ERROR(42); + *ep++ = CKET; + *ep++ = *--bracketp; + closed++; + continue; + + case '<': + *ep++ = CBRC|regexp_h_wchars; + continue; + + case '>': + *ep++ = CLET|regexp_h_wchars; + continue; + + case '{': + if(lastep == (char *) (0)) + goto defchar; + *lastep |= RNGE; + cflg = 0; + nlim: + c = GETC(); + i = 0; + do { + if ('0' <= c && c <= '9') + i = 10 * i + c - '0'; + else + ERROR(16); + } while(((c = GETC()) != '\\') && (c != ',')); + if (i > 255) + ERROR(11); + *ep++ = i; + if (c == ',') { + if(cflg++) + ERROR(44); + if((c = GETC()) == '\\') { + *ep++ = (char)255; + *lastep |= REGEXP_H_LEAST; + } else { + UNGETC(c); + goto nlim; /* get 2'nd number */ + } + } + if(GETC() != '}') + ERROR(45); + if(!cflg) /* one number */ + *ep++ = i; + else if((ep[-1] & 0377) < (ep[-2] & 0377)) + ERROR(46); + continue; + + case '\n': + ERROR(36); + + case 'n': + c = '\n'; + goto defchar; + + default: + if(c >= '1' && c <= '9') { + if((c -= '1') >= closed) + ERROR(25); + *ep++ = CBACK; + *ep++ = c; + continue; + } + } + /* Drop through to default to use \ to turn off special chars */ + + defchar: + default: + lastep = ep; +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + *ep++ = CCHR; + *ep++ = c; +#ifdef REGEXP_H_WCHARS + } else { + char mbbuf[MB_LEN_MAX]; + + switch (wctomb(mbbuf, c)) { + case 1: *ep++ = CCH1; + break; + case 2: *ep++ = CCH2; + break; + case 3: *ep++ = CCH3; + break; + default: + *ep++ = CCHR|CMB; + } + regexp_h_store(c, ep, endbuf); + } +#endif /* REGEXP_H_WCHARS */ + } + } +} + +int +step(const char *p1, const char *p2) +{ + register int c; +#ifdef REGEXP_H_WCHARS + register int d; +#endif /* REGEXP_H_WCHARS */ + + REGEXP_H_STEP_INIT /* get circf */ + regexp_h_bol = p1; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + if (circf) { + loc1 = (char *)p1; + return(regexp_h_advance(p1, p2)); + } + /* fast check for first character */ + if (*p2==CCHR) { + c = p2[1] & 0377; + do { + if ((*p1 & 0377) != c) + continue; + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); + } +#ifdef REGEXP_H_WCHARS + else if (*p2==CCH1) { + do { + if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH2) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH3) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if ((*p2&0377)==(CCHR|CMB)) { + d = regexp_h_fetch(p2, 0); + do { + c = regexp_h_fetch(p1, 1); + if (c == d && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while(c); + return(0); + } + /* regular algorithm */ + if (regexp_h_wchars) + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + else +#endif /* REGEXP_H_WCHARS */ + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); +} + +#ifdef REGEXP_H_WCHARS +/* + * It is painfully slow to read character-wise backwards in a + * multibyte string (see regexp_h_previous() above). For the star + * algorithm, we therefore keep track of every character as it is + * read in forward direction. + * + * Don't use alloca() for stack blocks since there is no measurable + * speedup and huge amounts of memory are used up for long input + * lines. + */ +#ifndef REGEXP_H_STAKBLOK +#define REGEXP_H_STAKBLOK 1000 +#endif + +struct regexp_h_stack { + struct regexp_h_stack *s_nxt; + struct regexp_h_stack *s_prv; + const char *s_ptr[REGEXP_H_STAKBLOK]; +}; + +#define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ + regexp_h_pushwc(sb, sp, sc, lp) : (void)0) + +static regexp_h_inline void +regexp_h_pushwc(struct regexp_h_stack **sb, + struct regexp_h_stack **sp, + const char ***sc, const char *lp) +{ + if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) + return; + if (*sb == NULL) { + if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) + return; + (*sb)->s_nxt = (*sb)->s_prv = NULL; + *sp = *sb; + *sc = &(*sb)->s_ptr[0]; + } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { + if ((*sp)->s_nxt == NULL) { + struct regexp_h_stack *bq; + + if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) + return; + bq->s_nxt = NULL; + bq->s_prv = *sp; + (*sp)->s_nxt = bq; + *sp = bq; + } else + *sp = (*sp)->s_nxt; + *sc = &(*sp)->s_ptr[0]; + } + *(*sc)++ = lp; +} + +static regexp_h_inline const char * +regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, + const char ***sc, const char *lp) +{ + if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) + return &lp[-1]; + if (*sp == NULL) + return regexp_h_firstwc; + if (*sc == &(*sp)->s_ptr[0]) { + if ((*sp)->s_prv == NULL) { + regexp_h_free(*sp); + *sp = NULL; + *sb = NULL; + return regexp_h_firstwc; + } + *sp = (*sp)->s_prv; + regexp_h_free((*sp)->s_nxt); + (*sp)->s_nxt = NULL ; + *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; + } + return *(--(*sc)); +} + +static void +regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) +{ + for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + if (*sp) { + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + regexp_h_free(*sp); + } + *sp = *sb = NULL; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_push(sb, sp, sc, lp) +#endif /* !REGEXP_H_WCHARS */ + +static int +regexp_h_advance(const char *lp, const char *ep) +{ + register const char *curlp; + int c, least; +#ifdef REGEXP_H_WCHARS + int d; + struct regexp_h_stack *sb = NULL, *sp = NULL; + const char **sc; +#endif /* REGEXP_H_WCHARS */ + char *bbeg; + int ct; + + for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { + + case CCHR: +#ifdef REGEXP_H_WCHARS + case CCH1: +#endif + if (*ep++ == *lp++) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CCHR|CMB: + if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) + continue; + return(0); + + case CCH2: + if (ep[0] == lp[0] && ep[1] == lp[1]) { + ep += 2, lp += 2; + continue; + } + return(0); + + case CCH3: + if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { + ep += 3, lp += 3; + continue; + } + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOT: + if (*lp++) + continue; + return(0); +#ifdef REGEXP_H_WCHARS + case CDOT|CMB: + if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOL: + if (*lp==0) + continue; + return(0); + + case CCEOF: + loc2 = (char *)lp; + return(1); + + case CCL: + c = *lp++ & 0377; + if(ISTHERE(c)) { + ep += 32; + continue; + } + return(0); + +#ifdef REGEXP_H_WCHARS + case CCL|CMB: + case CNCL|CMB: + c = regexp_h_fetch(lp, 1); + if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { + ep += (*ep & 0377) + 17; + continue; + } + return 0; +#endif /* REGEXP_H_WCHARS */ + + case CBRA: + braslist[*ep++ & 0377] = (char *)lp; + continue; + + case CKET: + braelist[*ep++ & 0377] = (char *)lp; + continue; + + case CBRC: + if (lp == regexp_h_bol && locs == NULL) + continue; + if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) + && !regexp_h_uletter(lp[-1] & 0377) + && !isdigit(lp[-1] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CBRC|CMB: + c = regexp_h_show(lp); + d = regexp_h_previous(lp); + if ((iswdigit(c) || regexp_h_wuletter(c)) + && !regexp_h_wuletter(d) + && !iswdigit(d)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CLET: + if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CLET|CMB: + c = regexp_h_show(lp); + if (!regexp_h_wuletter(c) && !iswdigit(c)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CCHR|RNGE: + c = *ep++; + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ != c) + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|RNGE|CMB: + case CCH1|RNGE: + case CCH2|RNGE: + case CCH3|RNGE: + c = regexp_h_fetch(ep, 0); + regexp_h_getrnge(ep, least); + while (low--) + if (regexp_h_fetch(lp, 1) != c) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if (regexp_h_fetch(lp, 1) != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CDOT|RNGE: + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ == '\0') + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ == '\0') + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|RNGE|CMB: + regexp_h_getrnge(ep, least); + while (low--) + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|RNGE: + regexp_h_getrnge(ep + 32, least); + while(low--) { + c = *lp++ & 0377; + if(!ISTHERE(c)) + return(0); + } + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + if(!ISTHERE(c)) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 34; /* 32 + 2 */ + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|RNGE|CMB: + case CNCL|RNGE|CMB: + regexp_h_getrnge(ep + (*ep & 0377) + 17, least); + while (low--) { + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + return 0; + } + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += (*ep & 0377) + 19; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CBACK: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + + if(strncmp(bbeg, lp, ct) == 0) { + lp += ct; + continue; + } + return(0); + + case CBACK|STAR: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + curlp = lp; + while(strncmp(bbeg, lp, ct) == 0) + lp += ct; + + while(lp >= curlp) { + if(regexp_h_advance(lp, ep)) return(1); + lp -= ct; + } + return(0); + + + case CDOT|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++); + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|STAR|CMB: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCHR|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++ == *ep); + ep++; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|STAR|CMB: + case CCH1|STAR: + case CCH2|STAR: + case CCH3|STAR: + curlp = lp; + d = regexp_h_fetch(ep, 0); + do + regexp_h_push(&sb, &sp, &sc, lp); + while (regexp_h_fetch(lp, 1) == d); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|STAR: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + } while(ISTHERE(c)); + ep += 32; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|STAR|CMB: + case CNCL|STAR|CMB: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) + == (CCL|STAR|CMB))); + ep += (*ep & 0377) + 17; + goto star; +#endif /* REGEXP_H_WCHARS */ + + star: +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + do { + if(--lp == locs) + break; + if (regexp_h_advance(lp, ep)) + return(1); + } while (lp > curlp); +#ifdef REGEXP_H_WCHARS + } else { + do { + lp = regexp_h_pop(&sb, &sp, &sc, lp); + if (lp <= locs) + break; + if (regexp_h_advance(lp, ep)) { + regexp_h_zerostak(&sb, &sp); + return(1); + } + } while (lp > curlp); + regexp_h_zerostak(&sb, &sp); + } +#endif /* REGEXP_H_WCHARS */ + return(0); + + } +} + +static void +regexp_h_getrnge(register const char *str, int least) +{ + low = *str++ & 0377; + size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; +} + +int +advance(const char *lp, const char *ep) +{ + REGEXP_H_ADVANCE_INIT /* skip past circf */ + regexp_h_bol = lp; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + return regexp_h_advance(lp, ep); +} diff --git a/libcommon/regexpr.c b/libcommon/regexpr.c @@ -0,0 +1,90 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)regexpr.c 1.8 (gritter) 10/13/04 */ + +#include <stdlib.h> +#include "regexpr.h" + +int regerrno, reglength; +static int circf; + +static char *regexpr_compile(char *, char *, const char *, int); + +char * +compile(const char *instring, char *ep, char *endbuf) +{ + char *cp; + int sz = 0; + + if (ep == 0) { + for (cp = (char *)instring; *cp != '\0'; cp++) + if (*cp == '[') + sz += 32; + sz += 2 * (cp - instring) + 5; + if ((ep = malloc(sz)) == 0) { + regerrno = 11; + return 0; + } + endbuf = &ep[sz]; + ep[1] = '\0'; + } + if ((cp=regexpr_compile((char *)instring, &ep[1], endbuf, '\0')) == 0) { + if (sz) + free(ep); + return 0; + } + ep[0] = circf; + reglength = cp - ep; + return sz ? ep : cp; +} + +#define INIT register char *sp = instring; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) return (c); +#define ERROR(c) { regerrno = c; return 0; } + +#define compile(a, b, c, d) regexpr_compile(a, b, c, d) +#define regexp_h_static static +#define REGEXP_H_STEP_INIT circf = *p2++; +#define REGEXP_H_ADVANCE_INIT circf = *ep++; + +#include "regexp.h" diff --git a/libcommon/regexpr.h b/libcommon/regexpr.h @@ -0,0 +1,53 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)regexpr.h 1.2 (gritter) 1/11/03 */ + +#define NBRA 9 + +extern char *braslist[NBRA]; +extern char *braelist[NBRA]; +extern int nbra; +extern int regerrno, reglength; +extern char *loc1, *loc2, *locs; +extern int sed; + +extern char *compile(const char *, char *, char *); +extern int step(const char *, const char *); +extern int advance(const char *, const char *); diff --git a/libcommon/setlabel.c b/libcommon/setlabel.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)setlabel.c 1.1 (gritter) 9/21/03 */ + +extern char *pfmt_label__; + +int +setlabel(const char *s) +{ + static char lbuf[26]; + char *lp; + + if (s && s[0]) { + for (lp = lbuf; *s && lp < &lbuf[sizeof lbuf-1]; s++, lp++) + *lp = *s; + *lp = '\0'; + pfmt_label__ = lbuf; + } else + pfmt_label__ = 0; + return 0; +} diff --git a/libcommon/setuxlabel.c b/libcommon/setuxlabel.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)setuxlabel.c 1.1 (gritter) 9/21/03 */ + +#include "msgselect.h" + +extern char *pfmt_label__; + +int +setuxlabel(const char *s) +{ + static char lbuf[msgselect(29,26)]; + char *lp, *mp; + + if (s && s[0]) { + lp = lbuf; + mp = msgselect("UX:",""); + while (*mp) + *lp++ = *mp++; + lbuf[0] = 'U', lbuf[1] = 'X', lbuf[2] = ':'; + while (*s && lp < &lbuf[sizeof lbuf-1]) + *lp++ = *s++; + *lp = '\0'; + pfmt_label__ = lbuf; + } else + pfmt_label__ = 0; + return 0; +} diff --git a/libcommon/sfile.c b/libcommon/sfile.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sfile.c 1.9 (gritter) 6/7/04 */ + +#ifdef __linux__ +#undef _FILE_OFFSET_BITS + +#include <sys/types.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> +#include <errno.h> +#include "sfile.h" + +long long +sfile(int dfd, int sfd, mode_t mode, long long count) +{ + static int enosys, einval, success; + off_t offset; + ssize_t sent, total; + extern void writerr(void *, int, int); + /* + * A process is not interruptible while executing a sendfile() + * system call. So it is not advisable to to send an entire + * file with one call; it is sent in parts so signals can + * be delivered in between. + */ + const ssize_t chunk = 196608; + + /* + * If a previous call returned ENOSYS, the operating system does + * not support sendfile() at all and it makes no sense to try it + * again. + * + * If a previous call returned EINVAL and there was no successful + * call yet, it is very likely that this is a permanent error + * condition (on Linux 2.6.0-test4, sendfile() may be used for + * socket targets only; older versions don't support tmpfs as + * source file system etc.). + */ + if (enosys || !success && einval || + (mode&S_IFMT) != S_IFREG || count > SSIZE_MAX) + return 0; + offset = lseek(sfd, 0, SEEK_CUR); + sent = 0, total = 0; + while (count > 0 && (sent = sendfile(dfd, sfd, &offset, + count > chunk ? chunk : count)) > 0) { + count -= sent, total += sent; + } + if (total && lseek(sfd, offset, SEEK_SET) == (off_t)-1) + return -1; + if (count == 0 || sent == 0) { + success = 1; + return total; + } + switch (errno) { + case ENOSYS: + enosys = 1; + return 0; + case EINVAL: + einval = 1; + return 0; + case ENOMEM: + return 0; + default: + writerr(NULL, count > chunk ? chunk : count, 0); + return -1; + } +} +#else /* !__linux__ */ +#include <sys/types.h> + +/*ARGSUSED*/ +long long +sfile(int dfd, int sfd, mode_t mode, long long count) +{ + return 0; +} +#endif /* __linux__ */ diff --git a/libcommon/sfile.h b/libcommon/sfile.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sfile.h 1.4 (gritter) 4/17/03 */ + +/* + * Return values: + * + * src_size The entire range has been copied. The file offset of both + * dst_fd and src_fd have been set to this position. The + * operation has been completed successfully. + * + * >0 Number of bytes written. The file offset of both dst_fd + * and src_fd have been set to this position. The operation + * may continue using read()/write(). + * + * 0 No data was written; operation may continue. + * + * -1 An error occured; operation may not continue. + */ +extern long long sfile(int dst_fd, int src_fd, mode_t src_mode, + long long src_size); diff --git a/libcommon/sighold.c b/libcommon/sighold.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sighold.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sighold(int sig) +{ + sigset_t set, oset; + + if (sig <= 0) + return -1; + sigemptyset(&set); + sigaddset(&set, sig); + return sigprocmask(SIG_BLOCK, &set, &oset); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigignore.c b/libcommon/sigignore.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigignore.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigignore(int sig) +{ + struct sigaction act; + + if (sig <= 0) + return -1; + act.sa_handler = SIG_IGN; + act.sa_flags = 0; + if (sig == SIGCHLD) + act.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&act.sa_mask); + sigaddset(&act.sa_mask, sig); + return sigaction(sig, &act, (struct sigaction *)0); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/signal.c b/libcommon/signal.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)signal.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +void (*signal(int sig, void (*func)(int)))(int) +{ + struct sigaction nact, oact; + + if (sig <= 0) + return SIG_ERR; + nact.sa_handler = func; + nact.sa_flags = SA_RESETHAND|SA_NODEFER; + if (sig == SIGCHLD && func == SIG_IGN) + nact.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&nact.sa_mask); + if (sigaction(sig, &nact, &oact) == -1) + return SIG_ERR; + return oact.sa_handler; +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigpause.c b/libcommon/sigpause.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigpause.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigpause(int sig) +{ + sigset_t nset, oset; + int ret; + + if (sig <= 0) + return -1; + sigemptyset(&nset); + sigaddset(&nset, sig); + if (sigprocmask(SIG_UNBLOCK, &nset, &oset) < 0) + return -1; + sigemptyset(&nset); + ret = sigsuspend(&nset); + if (sigprocmask(SIG_SETMASK, &oset, (sigset_t *)0) < 0) + ret = -1; + return ret; +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigrelse.c b/libcommon/sigrelse.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigrelse.c 1.8 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigrelse(int sig) +{ + sigset_t set, oset; + + if (sig <= 0) + return -1; + sigemptyset(&set); + sigaddset(&set, sig); + return sigprocmask(SIG_UNBLOCK, &set, &oset); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigset.c b/libcommon/sigset.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigset.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +void (*sigset(int sig, void (*func)(int)))(int) +{ + struct sigaction nact, oact; + sigset_t nset, oset; + + if (sig <= 0) + return SIG_ERR; + sigemptyset(&nset); + sigaddset(&nset, sig); + if (sigprocmask(func==SIG_HOLD?SIG_BLOCK:SIG_UNBLOCK, &nset, &oset) < 0) + return SIG_ERR; + nact.sa_handler = func; + nact.sa_flags = 0; + if (sig == SIGCHLD && func == SIG_IGN) + nact.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&nact.sa_mask); + sigaddset(&nact.sa_mask, sig); + if (sigaction(sig, func==SIG_HOLD?(struct sigaction *)0:&nact, &oact) + == -1) + return SIG_ERR; + if (sigismember(&oset, sig)) + return SIG_HOLD; + else + return (oact.sa_handler); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigset.h b/libcommon/sigset.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigset.h 1.9 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) + +#ifndef SIG_HOLD +#define SIG_HOLD ((void (*)(int))2) +#endif /* !SIG_HOLD */ + +extern int sighold(int); +extern int sigignore(int); +extern int sigpause(int); +extern int sigrelse(int); +extern void (*sigset(int, void (*)(int)))(int); +extern void (*signal(int, void (*)(int)))(int); +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/strtol.c b/libcommon/strtol.c @@ -0,0 +1,117 @@ +/* Sccsid @(#)strtol.c 1.6 (gritter) 7/18/04 */ + +#if defined (__hpux) || defined (_AIX) || \ + defined (__FreeBSD__) && (__FreeBSD__) < 5 + +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> + +#include "atoll.h" + +#ifdef __hpux +#ifndef _INCLUDE__STDC_A1_SOURCE +#error You must use cc -D_INCLUDE__STDC_A1_SOURCE on HP-UX +#endif +#endif /* __hpux */ + +static long long +internal(const char *nptr, char **endptr, int base, int flags) +{ + const char *pp = nptr, *bptr; + long long v = 0, ov; + int sign = 1; + int c; + int valid = 1; + + /* XXX + * iswspace() should be used. + */ + for (bptr = nptr; isspace(*bptr&0377); bptr++); + if (*bptr == '-') { + sign = -1; + bptr++; + } else if (*bptr == '+') + bptr++; + if (base == 0) { + if (*bptr >= '1' && *bptr <= '9') + base = 10; + else if (*bptr == '0') { + if (bptr[1] == 'x' || bptr[1] == 'X') + base = 16; + else + base = 8; + } else { + if (flags&1) + errno = EINVAL; + goto out; + } + } + if (base < 2 || base > 36) { + if (flags&1) + errno = EINVAL; + goto out; + } + if (base == 16 && bptr[0] == '0' && + (bptr[1] == 'x' || bptr[1] == 'X')) + bptr += 2; + pp = bptr; + for (;;) { + if (*pp >= '0' && *pp <= '9') + c = *pp - '0'; + else if (*pp >= 'a' && *pp <= 'z') + c = *pp - 'a' + 10; + else if (*pp >= 'A' && *pp <= 'A') + c = *pp - 'A' + 10; + else + break; + if (c >= base) + break; + pp++; + if (valid) { + ov = v; + v = v * base + c; + if (flags&1) { + if (flags&2 && (unsigned long long)v < + (unsigned long long)ov || + v < ov) { + sign = 1; + errno = ERANGE; + v = -1; + if ((flags&2)==0) + v = (unsigned long long)v >> 1; + valid = 0; + } + } + } + } +out: if (pp <= bptr) { + if (flags&1) + errno = EINVAL; + if (endptr) + *endptr = (char *)nptr; + } else { + if (endptr) + *endptr = (char *)pp; + } + return v * sign; +} + +long long +strtoll(const char *nptr, char **endptr, int base) +{ + return internal(nptr, endptr, base, 1); +} + +unsigned long long +strtoull(const char *nptr, char **endptr, int base) +{ + return (unsigned long long)internal(nptr, endptr, base, 3); +} + +long long +atoll(const char *nptr) +{ + return internal(nptr, NULL, 10, 0); +} +#endif /* __hpux || _AIX || __FreeBSD__ < 5 */ diff --git a/libcommon/sysv3.c b/libcommon/sysv3.c @@ -0,0 +1,2 @@ +/* Sccsid @(#)sysv3.c 1.1 (gritter) 5/29/04 */ +int sysv3; diff --git a/libcommon/utmpx.c b/libcommon/utmpx.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)utmpx.c 1.13 (gritter) 12/16/07 */ + +#include <stdio.h> + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__UCLIBC__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || \ + defined (__APPLE__) && \ + (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_OS_X_VERSION_10_5) +#include <sys/types.h> +#include <sys/time.h> +#include <utmp.h> +#include <string.h> + +#include "utmpx.h" + +static FILE *utfp; +static struct utmpx utx; +static const char *utmpfile = _PATH_UTMP; + +static FILE * +init(void) +{ + if (utfp == NULL && (utfp = fopen(utmpfile, "r+")) == NULL) + if ((utfp = fopen(utmpfile, "r")) == NULL) + return NULL; + return utfp; +} + +static struct utmpx * +utmp2utmpx(struct utmpx *ux, const struct utmp *up) +{ +#ifndef __dietlibc__ + memset(ux, 0, sizeof *ux); + ux->ut_tv.tv_sec = up->ut_time; + memcpy(ux->ut_line, up->ut_line, UT_LINESIZE); + memcpy(ux->ut_user, up->ut_name, UT_NAMESIZE); + memcpy(ux->ut_host, up->ut_host, UT_HOSTSIZE); + if (strcmp(up->ut_line, "~") == 0) + ux->ut_type = BOOT_TIME; + else if (strcmp(up->ut_line, "|") == 0) + ux->ut_type = OLD_TIME; + else if (strcmp(up->ut_line, "}") == 0) + ux->ut_type = NEW_TIME; + else if (*up->ut_name == 0) + ux->ut_type = DEAD_PROCESS; + else + ux->ut_type = USER_PROCESS; +#else /* __dietlibc__ */ + *ux = *up; +#endif /* __dietlibc__ */ + return ux; +} + +static struct utmp * +utmpx2utmp(struct utmp *up, const struct utmpx *ux) +{ +#ifndef __dietlibc__ + memset(up, 0, sizeof *up); + up->ut_time = ux->ut_tv.tv_sec; + switch (ux->ut_type) { + case DEAD_PROCESS: + memcpy(up->ut_line, ux->ut_line, UT_LINESIZE); + break; + default: + case EMPTY: + case INIT_PROCESS: + case LOGIN_PROCESS: + case RUN_LVL: + case ACCOUNTING: + return NULL; + case BOOT_TIME: + strcpy(up->ut_name, "reboot"); + strcpy(up->ut_line, "~"); + break; + case OLD_TIME: + strcpy(up->ut_name, "date"); + strcpy(up->ut_line, "|"); + break; + case NEW_TIME: + strcpy(up->ut_name, "date"); + strcpy(up->ut_line, "{"); + break; + case USER_PROCESS: + memcpy(up->ut_line, ux->ut_line, UT_LINESIZE); + memcpy(up->ut_name, ux->ut_user, UT_NAMESIZE); + memcpy(up->ut_host, ux->ut_host, UT_HOSTSIZE); + } +#else /* __dietlibc__ */ + *up = *ux; +#endif /* __dietlibc__ */ + return up; +} + +struct utmpx * +getutxent(void) +{ + static struct utmp zero; + struct utmp ut; + + if (init() == NULL) + return NULL; + do { + if (fread(&ut, sizeof ut, 1, utfp) != 1) + return NULL; + } while (memcmp(&ut, &zero, sizeof ut) == 0); + return utmp2utmpx(&utx, &ut); +} + +struct utmpx * +getutxline(const struct utmpx *ux) +{ + struct utmp ut; + + if (init() == NULL) + return NULL; + fseek(utfp, 0, SEEK_SET); + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + utmp2utmpx(&utx, &ut); + if ((utx.ut_type == LOGIN_PROCESS || + utx.ut_type == USER_PROCESS) && + strcmp(ut.ut_line, utx.ut_line) == 0) + return &utx; + } + return NULL; +} + +struct utmpx * +getutxid(const struct utmpx *ux) +{ +#ifdef __dietlibc__ + struct utmp ut; +#endif + + if (init() == NULL) + return NULL; +#ifdef __dietlibc__ + fseek(utfp, 0, SEEK_SET); + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + utmp2utmpx(&utx, &ut); + switch (ux->ut_type) { + case BOOT_TIME: + case OLD_TIME: + case NEW_TIME: + if (ux->ut_type == utx.ut_type) + return &utx; + break; + case INIT_PROCESS: + case LOGIN_PROCESS: + case USER_PROCESS: + case DEAD_PROCESS: + if (ux->ut_type == utx.ut_type && + ux->ut_id == utx.ut_id) + return &utx; + break; + } + } +#endif /* __dietlibc__ */ + return NULL; +} + +void +setutxent(void) +{ + if (init() == NULL) + return; + fseek(utfp, 0, SEEK_SET); +} + +void +endutxent(void) +{ + FILE *fp; + + if (init() == NULL) + return; + fp = utfp; + utfp = NULL; + fclose(fp); +} + +int +utmpxname(const char *name) +{ + utmpfile = strdup(name); + return 0; +} + +extern struct utmpx * +pututxline(const struct utmpx *up) +{ + struct utmp ut; + struct utmpx *rp; + + if (init() == NULL) + return NULL; + /* + * Cannot use getutxid() because there is no id field. Use + * the equivalent of getutxline() instead. + */ + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + if (strncmp(ut.ut_line, up->ut_line, UT_LINESIZE) == 0) { + fseek(utfp, -sizeof ut, SEEK_CUR); + break; + } + } + fflush(utfp); + if (utmpx2utmp(&ut, up) == NULL) + rp = NULL; + else if (fwrite(&ut, sizeof ut, 1, utfp) == 1) { + utx = *up; + rp = &utx; + } else + rp = NULL; + fflush(utfp); + return rp; +} + +extern void +updwtmpx(const char *name, const struct utmpx *up) +{ + FILE *fp; + + if ((fp = fopen(name, "a")) == NULL) + return; + fwrite(up, sizeof *up, 1, fp); + fclose(fp); +} + +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __UCLIBC__ || + __OpenBSD__ || __DragonFly__ || __APPLE__ */ diff --git a/libcommon/vpfmt.c b/libcommon/vpfmt.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)vpfmt.c 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> +#include <stdarg.h> + +#include "pfmt.h" + +extern char *pfmt_label__; + +/* + * Strip catalog and msgnum from s, but only if they actually appear. + */ +static const char * +begin(const char *s, long flags) +{ + const char *sp; + + if (flags & MM_NOGET) + return s; + sp = s; + if (*sp && *sp != ':') { + sp++; + while (*sp && *sp != '/' && *sp != ':' && sp - s < 14) + sp++; + } + if (*sp++ != ':') + return s; + while (*sp >= '0' && *sp <= '9') + sp++; + if (*sp++ != ':' || *sp == '\0') + return s; + return sp; +} + +int +vpfmt(FILE *stream, long flags, const char *fmt, va_list ap) +{ + int n = 0; + const char *severity = NULL; + char sevbuf[25]; + + if ((flags&MM_NOSTD) == 0) { + if (flags & MM_ACTION) + severity = "TO FIX"; + else switch (flags & 0377) { + case MM_HALT: + severity = "HALT"; + break; + case MM_WARNING: + severity = "WARNING"; + break; + case MM_INFO: + severity = "INFO"; + break; + case MM_ERROR: + severity = "ERROR"; + break; + default: + snprintf(sevbuf, sizeof sevbuf, "SEV=%ld", flags&0377); + severity = sevbuf; + } + if (pfmt_label__) + n = fprintf(stream, "%s: ", pfmt_label__); + if (severity) + n += fprintf(stream, "%s: ", severity); + } + n += vfprintf(stream, begin(fmt, flags), ap); + return n; +} diff --git a/libuxre/COPYING.LGPL b/libuxre/COPYING.LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/libuxre/NOTES b/libuxre/NOTES @@ -0,0 +1,14 @@ +Notes for the modified 'UNIX(R) Regular Expression Library' +============================================================ + +The code this is based on was released by Caldera as 'osutils-0.1a' +and is available at <http://unixtools.sourceforge.net/>. Notable +changes include: + +- Support for multibyte characters was enabled again. +- Support for traditional extended regular expression syntax was added. +- Fix: With REG_ICASE, [B-z] matches 'A', 'a', and '[' according to + POSIX.2. +- Some speed improvements. + + Gunnar Ritter 9/22/03 diff --git a/libuxre/_collelem.c b/libuxre/_collelem.c @@ -0,0 +1,119 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include <stddef.h> + +#define CCE(p) ((const CollElem *)(p)) +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) +{ + const char *tbl; + size_t hi, lo, cur; + const CollMult *cmp; + const CollElem *cep; + long diff; + int sz; + + /* + * ELEM_ENCODED is returned when the collation is entirely + * based on the encoded value of the character. + */ + if (col == 0 || col->flags & CHF_ENCODED + || (tbl = (const char *)col->maintbl) == 0) + { + return ELEM_ENCODED; + } + if ((wuchar_type)wc <= UCHAR_MAX) + { + indexed:; + cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); + if (cep->weight[0] == WGHT_SPECIAL) + return ELEM_BADCHAR; + return cep; + } + if (col->flags & CHF_INDEXED) + { + if ((wuchar_type)wc >= col->nmain) + return ELEM_BADCHAR; + goto indexed; + } + /* + * Binary search for a match. Could speed up the search if + * some interpolation was used, but keep it simple for now. + * Note that this is actually a table of CollMult's. + * + * To save space in the file, sequences of similar elements + * are sometimes compressed into a single CollMult that + * describes many entries. This is denoted by a subnbeg + * with the SUBN_SPECIAL bit set. The rest of the bits give + * the range covered by this entry. + */ + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += (1 + UCHAR_MAX) * col->elemsize; + lo = 0; + hi = col->nmain - UCHAR_MAX; + while (lo < hi) + { + if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ + cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ + cmp = CCM(&tbl[cur * sz]); + if ((diff = wc - cmp->ch) < 0) + hi = cur; + else if (cmp->elem.subnbeg & SUBN_SPECIAL) + { + if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) + lo = cur + 1; + else /* create an entry from the sequence in spare */ + { + spare->multbeg = cmp->elem.multbeg; + spare->subnbeg = 0; + spare->weight[0] = cmp->elem.weight[0] + diff; + for (lo = 1; lo < col->nweight; lo++) + { + wuchar_type w; + + if ((w = cmp->elem.weight[lo]) + == WGHT_SPECIAL) + { + w = spare->weight[0]; + } + spare->weight[lo] = w; + } + return spare; + } + } + else if (diff == 0) + return &cmp->elem; + else + lo = cur + 1; + } + return ELEM_BADCHAR; +} diff --git a/libuxre/_collmult.c b/libuxre/_collmult.c @@ -0,0 +1,55 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collmult.c 1.4 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include <stddef.h> + +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collmult(struct lc_collate *col, const CollElem *cep, wchar_t wc) +{ + const char *tbl; + size_t sz; + w_type ch; + + if (col == 0 || cep->multbeg == 0 + || (tbl = (const char *)col->multtbl) == 0) + { + return ELEM_BADCHAR; + } + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += sz * cep->multbeg; + while ((ch = CCM(tbl)->ch) != wc) + { + if (ch == 0) + return ELEM_BADCHAR; /* end of list */ + tbl += sz; + } + return &CCM(tbl)->elem; +} diff --git a/libuxre/bracket.c b/libuxre/bracket.c @@ -0,0 +1,829 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include "re.h" + +/* +* Build and match the [...] part of REs. +* +* In general, each compiled bracket construct holds a set of mapped +* wide character values and a set of character classifications. +* The mapping applied (when the current LC_COLLATE is not CHF_ENCODED) +* is the "basic" weight (cep->weight[0]); otherwise the actual wide +* character is used. +* +* To support simplified range handling, this code assumes that a w_type, +* a signed integer type, can hold all valid basic weight values (as well +* as all wide character values for CHF_ENCODED locales) and that these +* are all positive. Negative values indicate error conditions (BKT_*); +* zero (which must be the same as WGHT_IGNORE) indicates success, but +* that the item installed is not a range endpoint. +*/ + +static int +addwide(Bracket *bp, wchar_t ord) +{ + unsigned int nw; + + if ((nw = bp->nwide) < NWIDE) + bp->wide[nw] = ord; + else + { + if (nw % NWIDE == 0 && (bp->exwide = + realloc(bp->exwide, nw * sizeof(wchar_t))) == 0) + { + return BKT_ESPACE; + } + nw -= NWIDE; + bp->exwide[nw] = ord; + } + bp->nwide++; + return 0; +} + +#if USHRT_MAX == 65535 /* have 16 bits */ +#define PLIND(n) ((n) >> 4) +#define PLBIT(n) (1 << ((n) & 0xf)) +#else +#define PLIND(n) ((n) / CHAR_BIT) +#define PLBIT(n) (1 << ((n) % CHAR_BIT)) +#endif + +#define RANGE ((wchar_t)'-') /* separates wide chars in ranges */ + +static int +addrange(Bracket *bp, wchar_t ord, w_type prev) +{ + int ret; + + if (prev > 0 && prev != ord) /* try for range */ + { + if (prev > ord) + { + if (bp->flags & BKT_ODDRANGE) /* prev only - done */ + return 0; + else if ((bp->flags & BKT_BADRANGE) == 0) + return BKT_ERANGE; + } + else + { + if (++prev <= UCHAR_MAX) /* "prev" already there */ + { + do + { + bp->byte[PLIND(prev)] |= PLBIT(prev); + if (prev == ord) + return 0; + } while (++prev <= UCHAR_MAX); + } + if ((ret = addwide(bp, prev)) != 0) + return ret; + if (++prev > ord) + return 0; + if (prev < ord && (ret = addwide(bp, RANGE)) != 0) + return ret; + return addwide(bp, ord); + } + } + if (ord <= UCHAR_MAX) + { + bp->byte[PLIND(ord)] |= PLBIT(ord); + return 0; + } + if (prev == ord) /* don't bother */ + return 0; + return addwide(bp, ord); +} + +static w_type +place(Bracket *bp, wchar_t wc, w_type prev, int mb_cur_max) +{ + const CollElem *cep; + CollElem spare; + int ret; + + if ((cep = libuxre_collelem(bp->col, &spare, wc)) != ELEM_ENCODED) + { + if (cep == ELEM_BADCHAR) + return BKT_BADCHAR; + wc = cep->weight[0]; + } + if ((ret = addrange(bp, wc, prev)) != 0) + return ret; + return wc; +} + +#ifndef CHARCLASS_NAME_MAX +# define CHARCLASS_NAME_MAX 127 +#endif + +static w_type +chcls(Bracket *bp, const unsigned char *s, int n) +{ + char clsstr[CHARCLASS_NAME_MAX + 1]; + unsigned int nt; + wctype_t wct; + + if (n > CHARCLASS_NAME_MAX) + return BKT_ECTYPE; + (void)memcpy(clsstr, s, n); + clsstr[n] = '\0'; + if ((wct = wctype(clsstr)) == 0) + return BKT_ECTYPE; + if ((nt = bp->ntype) < NTYPE) + bp->type[nt] = wct; + else + { + if (nt % NTYPE == 0 && (bp->extype = + realloc(bp->extype, nt * sizeof(wctype_t))) == 0) + { + return BKT_ESPACE; + } + nt -= NTYPE; + bp->extype[nt] = wct; + } + bp->ntype++; + return 0; /* cannot be end point of a range */ +} + + /* + * The purpose of mcce() and its Mcce structure is to locate + * the next full collation element from "wc" and "s". It is + * called both at compile and execute time. These two differ + * primarily in that at compile time there is an exact number + * of bytes to be consumed, while at execute time the longest + * valid collation element is to be found. + * + * When BKT_ONECASE is set, MCCEs become particularly messy. + * There is no guarantee that all possible combinations of + * upper/lower case are defined as MCCEs. Thus, this code + * tries both lower- and uppercase (in that order) for each + * character than might be part of an MCCE. + */ + +typedef struct +{ + const unsigned char *max; /* restriction by caller */ + const unsigned char *aft; /* longest successful */ + Bracket *bp; /* readonly */ + struct lc_collate *col; /* readonly */ + const CollElem *cep; /* entry matching longest */ + wchar_t ch; /* initial character (if any) */ + w_type wc; /* character matching "aft" */ +} Mcce; + +static int +mcce(Mcce *mcp, const CollElem *cep, const unsigned char *s, int mb_cur_max, + int compile_time) +{ + const CollElem *nxt; + CollElem spare; + w_type ch, wc; + int i; + + /* + * Get next character. + */ + if ((wc = mcp->ch) != '\0') + { + mcp->ch = '\0'; + } + else if (ISONEBYTE(wc = *s++)) + { + if (wc == '\0') + return 0; + } + else if ((i = libuxre_mb2wc(&wc, s)) > 0) + { + s += i; + if (mcp->max != 0 && s > mcp->max) + return 0; + } + else if (i < 0) + return BKT_ILLSEQ; + /* + * Try out the this character as part of an MCCE. + * If BKT_ONECASE is set, this code tries both the lower- and + * uppercase version, continuing if it matches so far. + */ + ch = wc; + if (mcp->bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == ch) + ch = to_upper(wc); + } + for (;;) /* at most twice */ + { + if (cep == ELEM_BADCHAR) /* first character */ + { + if ((nxt = libuxre_collelem(mcp->col, &spare, wc)) + == ELEM_ENCODED + || (mcp->col->flags & CHF_MULTICH) == 0 + || s == mcp->max) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + break; + } + } + else + { + nxt = libuxre_collmult(mcp->col, cep, wc); + } + if (nxt != ELEM_BADCHAR) + { + /* + * Okay so far. Record this collating element + * if it's really one (not WGHT_IGNORE) and + * we've reached a new high point or it's the + * first match. + * + * If there's a possibility for more, call mcce() + * recursively for the subsequent characters. + */ + if (nxt->weight[0] != WGHT_IGNORE + && (mcp->aft < s || mcp->cep == ELEM_BADCHAR)) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + } + if (nxt->multbeg != 0 + && (mcp->max == 0 || s < mcp->max)) + { + if ((i = mcce(mcp, nxt, s, mb_cur_max, + compile_time)) != 0) + return i; + } + } + if (wc == ch) + break; + wc = ch; + } + return 0; +} + +static w_type +eqcls(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + w_type last; + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_EEQUIV; + last = mcbuf.wc; + if (mcbuf.cep != ELEM_ENCODED && mcbuf.col->nweight > 1) + { + const CollElem *cep; + + /* + * The first and last weight[0] values for equivalence + * classes are stuffed into the terminator for the + * multiple character lists. If these values are + * scattered (elements that are not part of this + * equivalence class have weight[0] values between the + * two end points), then SUBN_SPECIAL is placed in + * this terminator. Note that weight[1] of the + * terminator must be other than WGHT_IGNORE, too. + */ + last = mcbuf.cep->weight[0]; + if ((cep = libuxre_collmult(bp->col, mcbuf.cep, 0)) + != ELEM_BADCHAR + && cep->weight[1] != WGHT_IGNORE) + { + last = cep->weight[1]; + if (cep->subnbeg == SUBN_SPECIAL) + { + unsigned int nq; + + /* + * Permit ranges up to the first and + * after the last. + */ + if (prev > 0 && prev != cep->weight[0] + && (prev = addrange(bp, + cep->weight[0], prev)) != 0) + { + return prev; + } + /* + * Record the equivalence class by storing + * the primary weight. + */ + if ((nq = bp->nquiv) < NQUIV) + bp->quiv[nq] = mcbuf.cep->weight[1]; + else + { + if (nq % NQUIV == 0 && (bp->exquiv = + realloc(bp->exquiv, + nq * sizeof(wuchar_type))) + == 0) + { + return REG_ESPACE; + } + nq -= NQUIV; + bp->exquiv[nq] = mcbuf.cep->weight[1]; + } + bp->nquiv++; + return last; + } + mcbuf.cep = cep; + } + mcbuf.wc = mcbuf.cep->weight[0]; + } + /* + * Determine range, if any, to install. + * + * If there's a pending low (prev > 0), then try to use it. + * + * Otherwise, try to use mcbuf.wc as the low end of the range. + * Since addrange() assumes that the low point has already been + * placed, we try to fool it by using a prev of one less than + * mcbuf.wc. But, if that value would not look like a valid + * low point of a range, we have to explicitly place mcbuf.wc. + */ + if (prev <= 0 && (prev = mcbuf.wc - 1) <= 0) + { + if ((prev = addrange(bp, mcbuf.wc, 0)) != 0) + return prev; + } + if ((mcbuf.wc = addrange(bp, last, prev)) != 0) + return mcbuf.wc; + return last; +} + +static w_type +clsym(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_ECOLLATE; + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + if ((err = addrange(bp, mcbuf.wc, prev)) != 0) + return err; + return mcbuf.wc; +} + + /* + * Scans the rest of a bracket construction within a regular + * expression and fills in a description for it. + * The leading [ and the optional set complement indicator + * were handled already by the caller. + * Returns: + * <0 error (a BKT_* value) + * >0 success; equals how many bytes were scanned. + */ +LIBUXRE_STATIC int +libuxre_bktmbcomp(Bracket *bp, const unsigned char *pat0, + int flags, int mb_cur_max) +{ + static const Bracket zero = {0}; + const unsigned char *pat = pat0; + struct lc_collate *savecol; + w_type n, wc, prev = 0; + + /* + * Set represented set to empty. Easiest to copy an empty + * version over the caller's, (re)setting col and flags. + */ + savecol = bp->col; + *bp = zero; + bp->col = savecol; + bp->flags = flags + & (BKT_NEGATED | BKT_ONECASE | BKT_NOTNL | BKT_BADRANGE | + BKT_ODDRANGE); + /* + * Handle optional "empty" brackets; typically only used + * in combination with BKT_QUOTE or BKT_ESCAPE. + */ + if ((wc = *pat) == ']' && (flags & BKT_EMPTY) != 0) + return 1; + /* + * Populate *bp. + */ + for (;; prev = n) + { + switch (wc) + { + case '\0': + ebrack:; + n = BKT_EBRACK; + goto err; + case '\n': + if (flags & BKT_NLBAD) + goto ebrack; + goto regular; + case '/': + if (flags & BKT_SLASHBAD) + goto ebrack; + goto regular; + case '\\': + if ((flags & (BKT_ESCAPE | BKT_QUOTE + | BKT_ESCNL | BKT_ESCSEQ)) == 0) + { + goto regular; + } + switch (wc = *++pat) + { + default: + noesc:; + if ((flags & BKT_ESCAPE) == 0) + { + wc = '\\'; + pat--; + } + break; + case '\\': + case ']': + case '-': + case '^': + if ((flags & BKT_QUOTE) == 0) + goto noesc; + break; + case 'a': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\a'; + break; + case 'b': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\b'; + break; + case 'f': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\f'; + break; + case 'n': + if ((flags & (BKT_ESCSEQ | BKT_ESCNL)) == 0) + goto noesc; + wc = '\n'; + break; + case 'r': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\r'; + break; + case 't': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\t'; + break; + case 'v': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\v'; + break; + case 'x': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + if (!isxdigit(wc = *++pat)) + { + pat--; + goto noesc; + } + /* + * Take as many hex digits as possible, + * ignoring overflows. + * Any positive result is okay. + */ + n = 0; + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + n <<= 4; + n |= wc; + } while (isxdigit(wc = *++pat)); + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + /* + * For compatibility (w/awk), + * permit "octal" 8 and 9. + */ + n = wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + } + } + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + } + goto regular; + case '[': + if (((wc = *++pat) == ':' || wc == '=' || wc == '.') && + (flags & BKT_NOI18N) == 0) + { + n = 0; + while (*++pat != wc || pat[1] != ']') + { + if (*pat == '\0') + { + badpat:; + n = BKT_BADPAT; + goto err; + } + else if (*pat == '/') + { + if (flags & BKT_SLASHBAD) + goto badpat; + } + else if (*pat == '\n') + { + if (flags & BKT_NLBAD) + goto badpat; + } + n++; + } + if (n == 0) + { + n = BKT_EMPTYSUBBKT; + goto err; + } + if (wc == ':') + n = chcls(bp, &pat[-n], n); + else if (wc == '=') + n = eqcls(bp, &pat[-n], n, prev, + mb_cur_max); + else /* wc == '.' */ + n = clsym(bp, &pat[-n], n, prev, + mb_cur_max); + pat++; + break; + } + wc = '['; + pat--; + goto regular; + default: + if (!ISONEBYTE(wc) && + (n = libuxre_mb2wc(&wc, pat + 1)) > 0) + pat += n; + regular:; + n = place(bp, wc, prev, mb_cur_max); + break; + } + if (n < 0) { + n = BKT_ILLSEQ; + goto err; + } + if ((wc = *++pat) == ']') + break; + if (wc == '-' && n != 0) + { + if (prev == 0 || (flags & BKT_SEPRANGE) == 0) + { + if ((wc = *++pat) != ']') + continue; /* valid range */ + wc = '-'; + pat--; + } + } + n = 0; /* no range this time */ + } + return pat - pat0 + 1; +err:; + libuxre_bktfree(bp); + return n; +} + +LIBUXRE_STATIC void +libuxre_bktfree(Bracket *bp) +{ + if (bp->extype != 0) + free(bp->extype); + if (bp->exquiv != 0) + free(bp->exquiv); + if (bp->exwide != 0) + free(bp->exwide); +} + +LIBUXRE_STATIC int +libuxre_bktmbexec(Bracket *bp, wchar_t wc, + const unsigned char *str, int mb_cur_max) +{ + unsigned int i; + wchar_t lc, uc; + Mcce mcbuf; + + mcbuf.aft = str; /* in case of match in character classes */ + mcbuf.ch = wc; + /* + * First: check the single wc against any character classes. + * Since multiple character collating elements are not part + * of this world, they don't apply here. + */ + if ((i = bp->ntype) != 0) + { + wctype_t *wctp = &bp->type[0]; + + if (bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == mcbuf.ch) + mcbuf.ch = to_upper(wc); + } + for (;;) + { + if (iswctype(mb_cur_max==1?btowc(wc):wc, *wctp)) + goto match; + if (wc != mcbuf.ch && + iswctype(mb_cur_max==1?btowc(mcbuf.ch):mcbuf.ch, + *wctp)) + goto match; + if (--i == 0) + break; + if (++wctp == &bp->type[NTYPE]) + wctp = &bp->extype[0]; + } + } + /* + * The main match is determined by the weight[0] value + * of the character (or characters, if the input can be + * taken as a multiple character collating element). + */ + mcbuf.max = 0; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcce(&mcbuf, ELEM_BADCHAR, str, mb_cur_max, 0); + if (mcbuf.cep == ELEM_BADCHAR) + return -1; /* never matches */ + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + /* + * POSIX.2 demands that both a character and its case counterpart + * can match if REG_ICASE is set. This means that [B-z] matches + * 'A', 'a', and '['. + */ + if (bp->flags & BKT_ONECASE) + { + lc = to_lower(mcbuf.wc); + uc = to_upper(mcbuf.wc); + } + else + lc = uc = mcbuf.wc; + /* + * See if it's in the set. Note that the list of true wide + * character values has explicit ranges. + */ + if (mcbuf.wc <= UCHAR_MAX) + { + if (bp->byte[PLIND(lc)] & PLBIT(lc)) + goto match; + if (lc != uc && (bp->byte[PLIND(uc)] & PLBIT(uc))) + goto match; + } + else if ((i = bp->nwide) != 0) + { + wchar_t *wcp = &bp->wide[0]; + long lcmp, ucmp; + + for (;;) + { + if ((lcmp = lc - *wcp) == 0) + goto match; + ucmp = uc - *wcp; + if (lc != uc && ucmp == 0) + goto match; + if (--i == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (*wcp == RANGE) + { + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (lcmp > 0 && lc <= *wcp) + goto match; + if (lc != uc && ucmp > 0 && uc < *wcp) + goto match; + if ((i -= 2) == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + } + } + } + /* + * The last chance for a match is if an equivalence class + * was specified for which the primary weights are scattered + * through the weight[0]s. + */ + if ((i = bp->nquiv) != 0 && mcbuf.cep != ELEM_ENCODED) + { + wuchar_type *wucp = &bp->quiv[0]; + + mcbuf.wc = mcbuf.cep->weight[1]; + for (;;) + { + if (mcbuf.wc == *wucp) + goto match; + if (--i == 0) + break; + if (++wucp == &bp->quiv[NQUIV]) + wucp = &bp->exquiv[0]; + } + } + /* + * Only here when no match against the set was found. + * One final special case w/r/t newline. + */ + if (bp->flags & BKT_NEGATED) + { + if (wc != '\n' || (bp->flags & BKT_NOTNL) == 0) + return mcbuf.aft - str; + } + return -1; +match:; + /* + * Only here when a match against the described set is found. + */ + if (bp->flags & BKT_NEGATED) + return -1; + return mcbuf.aft - str; +} diff --git a/libuxre/colldata.h b/libuxre/colldata.h @@ -0,0 +1,226 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_COLLDATA_H +#define LIBUXRE_COLLDATA_H + +typedef struct +{ + long coll_offst; /* offset to xnd table */ + long sub_cnt; /* length of subnd table */ + long sub_offst; /* offset to subnd table */ + long str_offst; /* offset to strings for subnd table */ + long flags; /* nonzero if reg.exp. used */ +} hd; + +typedef struct +{ + unsigned char ch; /* character or number of followers */ + unsigned char pwt; /* primary weight */ + unsigned char swt; /* secondary weight */ + unsigned char ns; /* index of follower state list */ +} xnd; + +typedef struct +{ + char *exp; /* expression to be replaced */ + long explen; /* length of expression */ + char *repl; /* replacement string */ +} subnd; + +/*----------------------------------*/ + +#include <wcharm.h> +#include <limits.h> +/* #include <stdlock.h> */ + +/* +* Structure of a collation file: +* 1. CollHead (maintbl is 0 if CHF_ENCODED) +* if !CHF_ENCODED then +* 2. CollElem[bytes] (256 for 8 bit bytes) +* 3. if CHF_INDEXED then +* CollElem[wides] (nmain-256 for 8 bit bytes) +* else +* CollMult[wides] +* 4. CollMult[*] (none if multtbl is 0) +* 5. wuchar_type[*] (none if repltbl is 0) +* 6. CollSubn[*] (none if subntbl is 0) +* 7. strings (first is pathname for .so if CHF_DYNAMIC) +* +* The actual location of parts 2 through 7 is not important. +* +* The main table is in encoded value order. +* +* All indeces/offsets must be nonzero to be effective; zero is reserved +* to indicate no-such-entry. This implies either that an unused initial +* entry is placed in each of (4) through (7), or that the "start offset" +* given by the header is artificially pushed back by an entry size. +* +* Note that if CHF_ENCODED is not set, then nweight must be positive. +* +* If an element can begin a multiple character element, it contains a +* nonzero multbeg which is the initial index into (4) for its list; +* the list is terminated by a CollMult with a ch of zero. +* +* If there are elements with the same primary weight (weight[1]), then +* for each such element, it must have a CollMult list. The CollMult +* that terminates the list (ch==0) notes the lowest and highest basic +* weights for those elements with that same primary weight value +* respectively in weight[0] and weight[1]. If there are some basic +* weights between these values that do not have the same primary +* weight--are not in the equivalence class--then the terminator also +* has a SUBN_SPECIAL mark. Note that this list terminator should be +* shared when the elements are not multiple character collating +* elements because they wouldn't otherwise have a CollMult list. +* +* WGHT_IGNORE is used to denote ignored collating elements for a +* particular collation ordering pass. All main table entries other +* than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is +* possible for a CollMult entries from (4) to have a WGHT_IGNORE +* weight[0]: If, for example, "xyz" is a multiple character collating +* element, but "xy" is not, then the CollMult for "y" will have a +* WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each +* list of replacement weights. +* +* Within (3), it is possible to describe a sequence of unremarkable +* collating elements with a single CollMult entry. If the SUBN_SPECIAL +* bit is set, the rest of subnbeg represents the number of collating +* elements covered by this entry. The weight[0] values are determined +* by adding the difference between the encoded value and the entry's ch +* value to the entry's weight[0]. This value is then substituted for +* any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem() +* hides any match to such an entry by filling in a "spare" CollElem. +* +* If there are substitution strings, then for each character that begins +* a string, it has a nonzero subnbeg which is similarly the initial +* index into (6). The indeces in (6) refer to offsets within (7). +*/ + +#define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1)) + +#define CHF_ENCODED 0x1 /* collation by encoded values only */ +#define CHF_INDEXED 0x2 /* main table indexed by encoded values */ +#define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */ +#define CHF_DYNAMIC 0x8 /* shared object has collation functions */ + +#define CWF_BACKWARD 0x1 /* reversed ordering for this weight */ +#define CWF_POSITION 0x2 /* weight takes position into account */ + +#define CLVERS 1 /* most recent version */ + +#define WGHT_IGNORE 0 /* ignore this collating element */ +#define WGHT_SPECIAL TOPBIT(wuchar_type) +#define SUBN_SPECIAL TOPBIT(unsigned short) + +#ifndef COLL_WEIGHTS_MAX +#define COLL_WEIGHTS_MAX 1 +#endif + +typedef struct +{ + unsigned long maintbl; /* start of main table */ + unsigned long multtbl; /* start of multi-char table */ + unsigned long repltbl; /* start of replacement weights */ + unsigned long subntbl; /* start of substitutions */ + unsigned long strstbl; /* start of sub. strings */ + unsigned long nmain; /* # entries in main table */ + unsigned short flags; /* CHF_* bits */ + unsigned short version; /* handle future changes */ + unsigned char elemsize; /* # bytes/element (w/padding) */ + unsigned char nweight; /* # weights/element */ + unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */ +} CollHead; + +typedef struct +{ + unsigned short multbeg; /* start of multi-chars */ + unsigned short subnbeg; /* start of substitutions */ + wuchar_type weight[COLL_WEIGHTS_MAX]; +} CollElem; + +typedef struct +{ + wchar_t ch; /* "this" character (of sequence) */ + CollElem elem; /* its full information */ +} CollMult; + +typedef struct +{ + unsigned short strbeg; /* start of match string */ + unsigned short length; /* length of match string */ + unsigned short repbeg; /* start of replacement */ +} CollSubn; + +struct lc_collate +{ + const unsigned char *strstbl; + const wuchar_type *repltbl; + const CollElem *maintbl; + const CollMult *multtbl; + const CollSubn *subntbl; +#ifdef DSHLIB + void *handle; + void (*done)(struct lc_collate *); + int (*strc)(struct lc_collate *, const char *, const char *); + int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *); + size_t (*strx)(struct lc_collate *, char *, const char *, size_t); + size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t); +#endif + const char *mapobj; + size_t mapsize; + unsigned long nmain; + short nuse; + unsigned short flags; + unsigned char elemsize; + unsigned char nweight; + unsigned char order[COLL_WEIGHTS_MAX]; +}; + +#define ELEM_BADCHAR ((CollElem *)0) +#define ELEM_ENCODED ((CollElem *)-1) + +/* +LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *); +LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *, + const char *); +LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *, + const wchar_t *); +*/ +extern struct lc_collate *libuxre_lc_collate(struct lc_collate *); +LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *, + CollElem *, wchar_t); +LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *, + const CollElem *, wchar_t); +/* +LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *, + CollElem *, const unsigned char **); +LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *, + CollElem *, const wchar_t **); +*/ + +#endif /* !LIBUXRE_COLLDATA_H */ diff --git a/libuxre/depsinc.mk b/libuxre/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libuxre_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libuxre_DEPDIR -luxre diff --git a/libuxre/mkfile b/libuxre/mkfile @@ -0,0 +1,19 @@ +LIB = libuxre.a +LOBJ = onefile.o regfree.o regerror.o +LOCAL_CFLAGS = -I. + +<$mkbuild/mk.default + +_collelem.o: colldata.h re.h regex.h wcharm.h +_collmult.o: colldata.h re.h regex.h wcharm.h +bracket.o: colldata.h re.h regex.h wcharm.h +regcomp.o: colldata.h re.h regex.h wcharm.h +regdfa.o: colldata.h regdfa.h re.h regex.h wcharm.h +regerror.o: colldata.h re.h regex.h wcharm.h +regexec.o: colldata.h re.h regex.h wcharm.h +regfree.o: colldata.h re.h regex.h wcharm.h +regnfa.o: colldata.h re.h regex.h wcharm.h +regparse.o: colldata.h re.h regex.h wcharm.h +stubs.o: colldata.h wcharm.h +onefile.o: _collelem.c _collmult.c bracket.c regcomp.c regdfa.c regexec.c +onefile.o: regfree.c regnfa.c regparse.c stubs.c diff --git a/libuxre/onefile.c b/libuxre/onefile.c @@ -0,0 +1,38 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)onefile.c 1.1 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define LIBUXRE_STATIC static + +#include "_collelem.c" +#include "_collmult.c" +#include "stubs.c" +#include "bracket.c" +#include "regdfa.c" +#include "regnfa.c" +#include "regparse.c" +#include "regcomp.c" +#include "regexec.c" diff --git a/libuxre/re.h b/libuxre/re.h @@ -0,0 +1,228 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_RE_H +#define LIBUXRE_RE_H + + /* + * Maps safe external tag to internal one + */ +#define re_coll_ lc_collate /* <regex.h> */ +/* #define __fnm_collate lc_collate */ /* <fnmatch.h> */ + +#include <limits.h> +#include <regex.h> +/* #include <fnmatch.h> */ +#include <colldata.h> + +#define NBSHT (sizeof(unsigned short) * CHAR_BIT) +#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT) +#define NTYPE 4 +#define NWIDE 32 +#define NQUIV 4 + +typedef struct +{ + struct lc_collate *col; /* only member set by caller */ + wctype_t *extype; + wuchar_type *exquiv; + wchar_t *exwide; + wctype_t type[NTYPE]; + wuchar_type quiv[NQUIV]; + wchar_t wide[NWIDE]; + unsigned short byte[NBYTE]; + unsigned short ntype; + unsigned short nquiv; + unsigned short nwide; + unsigned int flags; +} Bracket; + +#define BKT_NEGATED 0x001 /* complemented set */ +#define BKT_ONECASE 0x002 /* uppercase same as lowercase */ +#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */ +#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */ +#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */ +#define BKT_NLBAD 0x020 /* newline disallowed */ +#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */ +#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */ +#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */ +#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */ +#define BKT_ESCNL 0x400 /* take \n as the newline character */ +#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */ +#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */ +#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */ +#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */ + + /* + * These error returns for libuxre_bktmbcomp() are directly tied to + * the error returns for regcomp() for convenience. + */ +#define BKT_BADPAT (-REG_BADPAT) +#define BKT_ECOLLATE (-REG_ECOLLATE) +#define BKT_ECTYPE (-REG_ECTYPE) +#define BKT_EEQUIV (-REG_EEQUIV) +#define BKT_BADCHAR (-REG_EBKTCHAR) +#define BKT_EBRACK (-REG_EBRACK) +#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT) +#define BKT_ERANGE (-REG_ERANGE) +#define BKT_ESPACE (-REG_ESPACE) +#define BKT_BADESC (-REG_BADESC) +#define BKT_ILLSEQ (-REG_ILLSEQ) + + /* + * These must be distinct from the flags in <fnmatch.h>. + */ +#define FNM_COLLATE 0x2000 /* have collation information */ +#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */ + + /* + * These must be distinct from the flags in <regex.h>. + */ +#define REG_NFA 0x20000000 +#define REG_DFA 0x40000000 +#define REG_GOTBKT 0x80000000 + +#define BRACE_INF USHRT_MAX +#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */ +#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */ + +typedef union /* extra info always kept for some tokens/nodes */ +{ + Bracket *bkt; /* ROP_BKT */ + size_t sub; /* ROP_LP (ROP_RP), ROP_REF */ + unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */ +} Info; + +typedef struct /* lexical context while parsing */ +{ + Info info; + const unsigned char *pat; + unsigned char *clist; + struct lc_collate *col; + unsigned long flags; + w_type tok; + size_t maxref; + size_t nleft; + size_t nright; + size_t nclist; + int bktflags; + int err; + int mb_cur_max; +} Lex; + +typedef struct t_tree Tree; /* RE parse tree node */ +struct t_tree +{ + union + { + Tree *ptr; /* unary & binary nodes */ + size_t pos; /* position for DFA leaves */ + } left; + union + { + Tree *ptr; /* binary nodes */ + Info info; + } right; + Tree *parent; + w_type op; /* positive => char. to match */ +}; + +typedef struct re_dfa_ Dfa; /* DFA engine description */ +typedef struct re_nfa_ Nfa; /* NFA engine description */ + +typedef struct +{ + const unsigned char *str; + regmatch_t *match; + size_t nmatch; + unsigned long flags; + int mb_cur_max; +} Exec; + + /* + * Regular expression operators. Some only used internally. + * All are negative, to distinguish them from the regular + * "match this particular wide character" operation. + */ +#define BINARY_ROP 0x02 +#define UNARY_ROP 0x01 +#define LEAF_ROP 0x00 + +#define MAKE_ROP(k, v) (-((v) | ((k) << 4))) +#define KIND_ROP(v) ((-(v)) >> 4) + +#define ROP_OR MAKE_ROP(BINARY_ROP, 1) +#define ROP_CAT MAKE_ROP(BINARY_ROP, 2) + +#define ROP_STAR MAKE_ROP(UNARY_ROP, 1) +#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2) +#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3) +#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4) +#define ROP_LP MAKE_ROP(UNARY_ROP, 5) +#define ROP_RP MAKE_ROP(UNARY_ROP, 6) + +#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */ +#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */ +#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */ +#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */ +#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */ +#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */ +#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */ +#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */ +#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */ +#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */ +#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */ +#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */ +#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */ +#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */ + + /* + * Return values: + * libuxre_bktmbcomp() + * <0 error (see BKT_* above); >0 #bytes scanned + * libuxre_bktmbexec() + * <0 doesn't match; >=0 matches, #extra bytes scanned + */ +LIBUXRE_STATIC void libuxre_bktfree(Bracket *); +LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *, + int, int); +LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t, + const unsigned char *, int); + +LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int); +LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *); +LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *); +LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int); + +extern void libuxre_regdeldfa(Dfa *); +LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *); + +extern void libuxre_regdelnfa(Nfa *); +LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *); +#endif /* !LIBUXRE_RE_H */ diff --git a/libuxre/regcomp.c b/libuxre/regcomp.c @@ -0,0 +1,77 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regcomp.c 1.6 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regcomp = _regcomp */ + +int +regcomp(regex_t *ep, const char *pat, int flags) +{ + Tree *tp; + Lex lex; + + if ((tp=libuxre_regparse(&lex, (const unsigned char *)pat, flags)) == 0) + goto out; + ep->re_nsub = lex.nleft; + ep->re_flags = lex.flags & ~(REG_NOTBOL | REG_NOTEOL | REG_NONEMPTY); + ep->re_col = lex.col; + ep->re_mb_cur_max = lex.mb_cur_max; + /* + * Build the engine(s). The factors determining which are built: + * 1. If the pattern built insists on an NFA, then only build NFA. + * 2. If flags include REG_NOSUB or REG_ONESUB and not (1), + * then only build DFA. + * 3. Otherwise, build both. + * Since libuxre_regdfacomp() modifies the tree and libuxre_regnfacomp() + * doesn't, libuxre_regnfacomp() must be called first, if both are to + * be called. + */ + if (ep->re_nsub != 0 && (flags & (REG_NOSUB | REG_ONESUB)) == 0 + || lex.flags & REG_NFA) + { + ep->re_flags |= REG_NFA; + if ((lex.err = libuxre_regnfacomp(ep, tp, &lex)) != 0) + goto out; + } + if ((lex.flags & REG_NFA) == 0) + { + ep->re_flags |= REG_DFA; + if ((lex.err = libuxre_regdfacomp(ep, tp, &lex)) != 0) + { + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + } + } +out:; + if (lex.err != 0 && lex.col != 0) + (void)libuxre_lc_collate(lex.col); + if (tp != 0) + libuxre_regdeltree(tp, lex.err); + return lex.err; +} diff --git a/libuxre/regdfa.c b/libuxre/regdfa.c @@ -0,0 +1,877 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "regdfa.h" + +/* +* Deterministic Finite Automata. +*/ + + /* + * Postorder traversal that returns a copy of the subtree, + * except that ROP_BKT becomes ROP_BKTCOPY (since they + * share the same pointed to Bracket object). + */ +static Tree * +copy(regex_t *ep, Tree *tp) +{ + Tree *np; + + if ((np = malloc(sizeof(Tree))) == 0) + return 0; + switch (np->op = tp->op) /* almost always correct */ + { + case ROP_BKT: + np->op = ROP_BKTCOPY; + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + np->right.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + np->left.pos = ep->re_dfa->nposn++; + /*FALLTHROUGH*/ + case ROP_EMPTY: + return np; + case ROP_CAT: + case ROP_OR: + if ((np->right.ptr = copy(ep, tp->right.ptr)) == 0) + { + free(np); + return 0; + } + np->right.ptr->parent = np; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((np->left.ptr = copy(ep, tp->left.ptr)) == 0) + break; + np->left.ptr->parent = np; + return np; + } + libuxre_regdeltree(np, 1); + return 0; +} + + /* + * Postorder traversal. + * Assign unique ascending integer values to the leaves. + * Since the right child is traversed before the left, + * the position for ROP_END is guaranteed to be zero. + * The parse tree is rewritten in two cases: + * - Each ROP_BRACE is replaced by an equivalent--sometimes + * large--subtree using only ROP_CAT, ROP_QUEST, and + * ROP_PLUS. + * - If REG_ICASE, replace each simple character that has + * an uppercase equivalent with a ROP_OR subtree over the + * two versions. + * Since these rewrites occur bottom up, they have already + * been applied before any subtrees passed to copy(). + */ +static Tree * +findposn(regex_t *ep, Tree *tp, int mb_cur_max) +{ + unsigned int lo, hi; + Tree *ptr, *par; + w_type wc; + + switch (tp->op) + { + default: + if (ep->re_flags & REG_ICASE + && (wc = to_upper(tp->op)) != tp->op) + { + if ((ptr = libuxre_reg1tree(tp->op, 0)) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + tp->op = ROP_OR; + tp->left.ptr = ptr; + ptr = libuxre_reg1tree(wc, 0); + if ((tp->right.ptr = ptr) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + return tp; + } + /*FALLTHROUGH*/ + case ROP_BOL: + case ROP_EOL: + case ROP_ALL: + case ROP_ANYCH: + case ROP_NOTNL: + case ROP_NONE: + case ROP_BKT: + case ROP_BKTCOPY: + case ROP_END: + tp->left.pos = ep->re_dfa->nposn++; + return tp; + case ROP_EMPTY: + return tp; + case ROP_OR: + case ROP_CAT: + if ((tp->right.ptr = findposn(ep, tp->right.ptr, + mb_cur_max)) == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + return tp; + case ROP_BRACE: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + break; + } + /* + * ROP_BRACE as is cannot be handled in a DFA. This code + * duplicates the ROP_BRACE subtree as a left-towering + * series of ROP_CAT nodes, the first "lo" of which are + * direct copies of the original subtree. The tail of + * the series are either some number of ROP_QUESTs over + * copies of the original subtree, or a single ROP_PLUS + * over a copy (when "hi" is infinity). + * + * All interesting cases {lo,hi}: + * {0,0} -> ROP_EMPTY, parsing, temporary + * {0,1} -> ROP_QUEST, parsing + * {0,2} -> CAT(QUEST(left), QUEST(copy)) + * {0,n} -> CAT({0,n-1}, QUEST(copy)) + * {0,} -> ROP_STAR, parsing + * + * {1,1} -> ROP_NOP, parsing, temporary + * {1,2} -> CAT(left, QUEST(copy)) + * {1,n} -> CAT({1,n-1}, QUEST(copy)) + * {1,} -> ROP_PLUS, parsing + * + * {2,2} -> CAT(left, copy) + * {2,n} -> CAT({2,n-1}, QUEST(copy)) + * {2,} -> CAT(left, PLUS(copy)) + * + * {3,3} -> CAT({2,2}, copy) + * {3,n} -> CAT({3,n-1}, QUEST(copy)) + * {3,} -> CAT({2,2}, PLUS(copy)) + * + * {n,} -> CAT({n-1,n-1}, PLUS(copy)) + * + * In all cases, the ROP_BRACE node is turned into the + * left-most ROP_CAT, and a copy of its original subtree + * is connected as the right child. Note that the bottom- + * up nature of this duplication guarantees that copy() + * never sees a ROP_BRACE node. + */ + par = tp->parent; + lo = tp->right.info.num[0]; + hi = tp->right.info.num[1]; + if ((ptr = copy(ep, tp->left.ptr)) == 0) + return 0; + ptr->parent = tp; + tp->op = ROP_CAT; + tp->right.ptr = ptr; + if (lo == 0) + { + if ((tp->left.ptr = libuxre_reg1tree(ROP_QUEST, tp->left.ptr)) + == 0) + return 0; + tp->left.ptr->parent = tp; + } + else + { + if (hi == BRACE_INF || (hi -= lo) == 0) + lo--; /* lo > 1; no extra needed */ + while (--lo != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + if (hi == BRACE_INF) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_PLUS, tp->right.ptr)) + == 0) + return 0; + tp->right.ptr->parent = tp; + } + else if (hi != 0) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_QUEST, tp->right.ptr)) + == 0) + return 0; + ptr = tp->right.ptr; + ptr->parent = tp; + while (--hi != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + tp->parent = par; + return tp; +} + + /* + * Postorder traversal, but not always entire subtree. + * For each leaf reachable by the empty string, add it + * to the set. Return 0 if the subtree can match empty. + */ +static int +first(Dfa *dp, Tree *tp) +{ + switch (tp->op) + { + case ROP_BOL: + if (dp->flags & REG_NOTBOL) + return 0; + break; + case ROP_EOL: + if (dp->flags & REG_NOTEOL) + return 0; + break; + case ROP_EMPTY: + return 0; + case ROP_OR: + return first(dp, tp->left.ptr) & first(dp, tp->right.ptr); + case ROP_CAT: + if (first(dp, tp->left.ptr) != 0) + return 1; + return first(dp, tp->right.ptr); + case ROP_BRACE: + if (tp->right.info.num[0] != 0 && first(dp, tp->left.ptr) != 0) + return 1; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_QUEST: + first(dp, tp->left.ptr); + return 0; + case ROP_LP: + case ROP_PLUS: + return first(dp, tp->left.ptr); + } + if (dp->posset[tp->left.pos] == 0) + { + dp->posset[tp->left.pos] = 1; + dp->nset++; + } + return 1; +} + + /* + * Walk from leaf up (most likely not to root). + * Determine follow set for the leaf by filling + * set[] with the positions reachable. + */ +static void +follow(Dfa *dp, Tree *tp) +{ + Tree *pp; + + switch ((pp = tp->parent)->op) + { + case ROP_CAT: + if (pp->left.ptr == tp && first(dp, pp->right.ptr) != 0) + break; + /*FALLTHROUGH*/ + case ROP_OR: + case ROP_QUEST: + case ROP_LP: + follow(dp, pp); + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_BRACE: + first(dp, tp); + follow(dp, pp); + break; + } +} + + /* + * Postorder traversal. + * At each leaf, copy it into posn[] and assign its follow set. + * Because the left-most subtree is ROP_ALL under ROP_STAR, the + * follow set for its leaf (position dp->nposn-1) is the same + * as the initial state's signature (prior to any ROP_BOL). + */ +static int +posnfoll(Dfa *dp, Tree *tp) +{ + unsigned char *s; + size_t i, n; + size_t *fp; + Posn *p; + int ret; + + switch (tp->op) + { + case ROP_OR: + case ROP_CAT: + if ((ret = posnfoll(dp, tp->right.ptr)) != 0) + return ret; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((ret = posnfoll(dp, tp->left.ptr)) != 0) + return ret; + return 0; + case ROP_END: /* keeps follow() from walking above the root */ + p = &dp->posn[tp->left.pos]; + p->op = tp->op; + p->seti = 0; + p->nset = 0; + return 0; + case ROP_BKT: + case ROP_BKTCOPY: + p = &dp->posn[tp->left.pos]; + p->bkt = tp->right.info.bkt; + goto skip; + case ROP_BOL: + dp->flags |= REG_NOTBOL; /* adjacent ROP_BOLs match empty */ + break; + case ROP_EOL: + dp->flags |= REG_NOTEOL; /* adjacent ROP_EOLs match empty */ + break; + } + p = &dp->posn[tp->left.pos]; +skip:; + p->op = tp->op; + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + follow(dp, tp); + dp->flags &= ~(REG_NOTBOL | REG_NOTEOL); + fp = dp->posfoll; + if ((p->nset = dp->nset) > dp->avail) /* need more */ + { + if ((n = p->nset << 1) < dp->nposn) + n = dp->nposn; + dp->avail += n; + if ((fp = realloc(dp->posfoll, + sizeof(size_t) * (dp->avail + dp->used))) == 0) + { + return REG_ESPACE; + } + dp->posfoll = fp; + } + p->seti = dp->used; + if ((i = dp->nset) != 0) + { + dp->used += i; + dp->avail -= i; + fp += p->seti; + s = dp->posset; + n = 0; + do + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } while (++n != dp->nposn); + } + return 0; +} + +static int +addstate(Dfa *dp) /* install state if unique; return its index */ +{ + size_t *sp, *fp; + size_t t, n, i; + int flushed; + + /* + * Compare dp->nset/dp->cursig[] against remembered states. + */ + t = dp->top; + do + { + if (dp->nsig[--t] != dp->nset) + continue; + if ((n = dp->nset) != 0) + { + fp = &dp->sigfoll[dp->sigi[t]]; + sp = &dp->cursig[0]; + loop:; + if (*fp++ != *sp++) + continue; /* to the do-while */ + if (--n != 0) + goto loop; + } + return t + 1; + } while (t != 0); + /* + * Not in currently cached states; add it. + */ + flushed = 0; + if ((t = dp->top) >= CACHESZ) /* need to flush the cache */ + { + flushed = 1; + n = dp->anybol; + n = dp->sigi[n] + dp->nsig[n]; /* past invariant states */ + dp->avail += dp->used - n; + dp->used = n; + dp->top = n = dp->nfix; + memset((void *)&dp->trans, 0, sizeof(dp->trans)); + memset((void *)&dp->acc[n], 0, CACHESZ - n); + t = n; + } + dp->top++; + fp = dp->sigfoll; + if ((n = dp->nset) > dp->avail) /* grow strip */ + { + i = dp->avail + n << 1; + if ((fp = realloc(fp, sizeof(size_t) * (i + dp->used))) == 0) + return 0; + dp->avail = i; + dp->sigfoll = fp; + } + dp->acc[t] = 0; + if ((dp->nsig[t] = n) != 0) + { + sp = dp->cursig; + if (sp[0] == 0) + dp->acc[t] = 1; + dp->sigi[t] = i = dp->used; + dp->used += n; + dp->avail -= n; + fp += i; + do + *fp++ = *sp++; + while (--n != 0); + } + t++; + if (flushed) + return -t; + return t; +} + +void +libuxre_regdeldfa(Dfa *dp) +{ + Posn *pp; + size_t np; + + if (dp->posfoll != 0) + free(dp->posfoll); + if (dp->sigfoll != 0) + free(dp->sigfoll); + if (dp->cursig != 0) + free(dp->cursig); + if ((pp = dp->posn) != 0) + { + /* + * Need to walk the positions list to free any + * space used for ROP_BKTs. + */ + np = dp->nposn; + do + { + if (pp->op == ROP_BKT) + { + libuxre_bktfree(pp->bkt); + free(pp->bkt); + } + } while (++pp, --np != 0); + free(dp->posn); + } + free(dp); +} + +int +regtrans(Dfa *dp, int st, w_type wc, int mb_cur_max) +{ + const unsigned char *s; + size_t *fp, *sp; + size_t i, n; + Posn *pp; + int nst; + + if ((n = dp->nsig[st]) == 0) /* dead state */ + return st + 1; /* stay here */ + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + fp = &dp->sigfoll[dp->sigi[st]]; + do + { + pp = &dp->posn[*fp]; + switch (pp->op) + { + case ROP_EOL: + if (wc == '\0' && (dp->flags & REG_NOTEOL) == 0) + break; + /*FALLTHROUGH*/ + case ROP_BOL: + default: + if (pp->op == wc) + break; + /*FALLTHROUGH*/ + case ROP_END: + case ROP_NONE: + continue; + case ROP_NOTNL: + if (wc == '\n') + continue; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc <= '\0') + continue; + break; + case ROP_ALL: + if (wc == '\0') + continue; + break; + case ROP_BKT: + case ROP_BKTCOPY: + /* + * Note that multiple character bracket matches + * are precluded from DFAs. (See regparse.c and + * regcomp.c.) Thus, the continuation string + * argument is not used in libuxre_bktmbexec(). + */ + if (wc > '\0' && + libuxre_bktmbexec(pp->bkt, wc, 0, mb_cur_max) == 0) + break; + continue; + } + /* + * Current character matches this position. + * For each position in its follow list, + * add that position to the new state's signature. + */ + i = pp->nset; + sp = &dp->posfoll[pp->seti]; + do + { + if (dp->posset[*sp] == 0) + { + dp->posset[*sp] = 1; + dp->nset++; + } + } while (++sp, --i != 0); + } while (++fp, --n != 0); + /* + * Move the signature (if any) into cursig[] and install it. + */ + if ((i = dp->nset) != 0) + { + fp = dp->cursig; + s = dp->posset; + for (n = 0;; n++) + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } + } + if ((nst = addstate(dp)) < 0) /* flushed cache */ + nst = -nst; + else if (nst > 0 && (wc & ~(long)(NCHAR - 1)) == 0) + dp->trans[st][wc] = nst; + return nst; +} + +LIBUXRE_STATIC int +libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Tree *lp; + Dfa *dp; + Posn *p; + int st; + + /* + * It's convenient to insert an STAR(ALL) subtree to the + * immediate left of the current tree. This makes the + * "any match" libuxre_regdfaexec() not a special case, + * and the initial state signature will fall out when + * building the follow sets for all the leaves. + */ + if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0 + || (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0 + || (tp->left.ptr = lp + = libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0) + { + return REG_ESPACE; + } + lp->parent = tp; + if ((dp = calloc(1, sizeof(Dfa))) == 0) + return REG_ESPACE; + ep->re_dfa = dp; + /* + * Just in case null pointers aren't just all bits zero... + */ + dp->posfoll = 0; + dp->sigfoll = 0; + dp->cursig = 0; + dp->posn = 0; + /* + * Assign position values to each of the tree's leaves + * (the important parts), meanwhile potentially rewriting + * the parse tree so that it fits within the restrictions + * of our DFA. + */ + if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0) + goto err; + /* + * Get space for the array of positions and current set, + * now that the number of positions is known. + */ + if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0) + goto err; + dp->posset = (unsigned char *)&dp->posn[dp->nposn]; + /* + * Get follow sets for each position. + */ + if (posnfoll(dp, tp) != 0) + goto err; + /* + * Set up the special invariant states: + * - dead state (no valid transitions); index 0. + * - initial state for any match [STAR(ALL) follow set]; index 1. + * - initial state for any match after ROP_BOL. + * - initial state for left-most longest if REG_NOTBOL. + * - initial state for left-most longest after ROP_BOL. + * The final two are not allocated if leftmost() cannot be called. + * The pairs of initial states are the same if there is no + * explicit ROP_BOL transition. + */ + dp->avail += dp->used; + dp->used = 0; + if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0) + goto err; + p = &dp->posn[dp->nposn - 1]; /* same as first(root) */ + dp->cursig = &dp->posfoll[p->seti]; + dp->nset = p->nset; + dp->top = 1; /* index 0 is dead state */ + addstate(dp); /* must be state index 1 (returns 2) */ + if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0) + goto err; + dp->nfix = 2; + if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0) + goto err; + if ((dp->anybol = st - 1) == 2) /* new state */ + dp->nfix = 3; + if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */ + { + /* + * leftmost() initial states are the same as the + * "any match" ones without the STAR(ALL) position. + */ + dp->sigi[dp->nfix] = 0; + dp->nsig[dp->nfix] = dp->nsig[1] - 1; + dp->acc[dp->nfix] = dp->acc[1]; + dp->leftbol = dp->leftmost = dp->nfix; + dp->nfix++; + if (dp->anybol != 1) /* distinct state w/BOL */ + { + dp->sigi[dp->nfix] = dp->sigi[2]; + dp->nsig[dp->nfix] = dp->nsig[2] - 1; + dp->acc[dp->nfix] = dp->acc[2]; + dp->leftbol = dp->nfix; + dp->nfix++; + } + dp->top = dp->nfix; + } + return 0; +err:; + libuxre_regdeldfa(dp); + return REG_ESPACE; +} + +static int +leftmost(Dfa *dp, Exec *xp) +{ + const unsigned char *s, *beg, *end; + int i, nst, st, mb_cur_max; + w_type wc; + + mb_cur_max = xp->mb_cur_max; + beg = s = xp->str; + end = 0; + st = dp->leftbol; + if (xp->flags & REG_NOTBOL) + st = dp->leftmost; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + end = s; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + { + if (end == 0 || end < s) + end = s; + break; + } + beg = s; + st = dp->leftbol; + goto newst; + } + } + if ((st = nst - 1) == 0) /* dead state */ + { + if (end != 0) + break; + if ((wc = *beg++) == '\0') + return REG_NOMATCH; + else if (!ISONEBYTE(wc) && + (i = libuxre_mb2wc(&wc, beg)) > 0) + beg += i; + s = beg; + st = dp->leftmost; + goto newst; + } + if (wc == '\0') + { + if (dp->acc[st]) + { + s--; /* don't include \0 */ + if (end == 0 || end < s) + end = s; + break; + } + if (end != 0) + break; + return REG_NOMATCH; + } + newst:; + if (dp->acc[st]) + { + if (end == 0 || end < s) + end = s; + } + } + xp->match[0].rm_so = beg - xp->str; + xp->match[0].rm_eo = end - xp->str; + return 0; +} + +/* +* Optimization by simplification: singlebyte locale and REG_NEWLINE not set. +* Performance gain for grep is 25% so it's worth the hack. +*/ +static int +regdfaexec_opt(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int nst, st; + + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + do + { + if ((nst = dp->trans[st][*s]) == 0) + { + if ((nst = regtrans(dp, st, *s, 1)) == 0) + return REG_ESPACE; + } + if (dp->acc[st = nst - 1]) + return 0; + } while (*s++ != '\0'); /* st != 0 */ + return REG_NOMATCH; +} + +LIBUXRE_STATIC int +libuxre_regdfaexec(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int i, nst, st, mb_cur_max; + w_type wc; + + dp->flags = xp->flags & REG_NOTEOL; /* for regtrans() */ + mb_cur_max = xp->mb_cur_max; + if (xp->nmatch != 0) + return leftmost(dp, xp); + if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0) + return regdfaexec_opt(dp, xp); + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + return 0; + if (dp->acc[st = dp->anybol]) + return 0; + continue; + } + } + if (dp->acc[st = nst - 1]) + return 0; + if (wc == '\0') /* st == 0 */ + return REG_NOMATCH; + } +} diff --git a/libuxre/regdfa.h b/libuxre/regdfa.h @@ -0,0 +1,75 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.h 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ + +/* +* Deterministic Finite Automata. +*/ + +#ifndef LIBUXRE_REGDFA_H +#define LIBUXRE_REGDFA_H + +#include <re.h> + +typedef struct +{ + Bracket *bkt; /* extra info for ROP_BKT */ + size_t nset; /* number of items in the follow set */ + size_t seti; /* index into the follow set strip */ + w_type op; /* the leaf match operation */ +} Posn; + +#define CACHESZ 32 /* max. states to remember (must fit in uchar) */ +#define NCHAR (1 << CHAR_BIT) + +struct re_dfa_ /*Dfa*/ +{ + unsigned char *posset; /* signatures built here */ + size_t *posfoll; /* follow strip for posn[] */ + size_t *sigfoll; /* follow strip for sigi[] */ + size_t *cursig; /* current state's signature */ + Posn *posn; /* important positions */ + size_t nposn; /* length of posn,cursig,posset */ + size_t used; /* used portion of follow strip */ + size_t avail; /* unused part of follow strip */ + size_t nset; /* # items nonzero in posset[] */ + size_t nsig[CACHESZ]; /* number of items in signature */ + size_t sigi[CACHESZ]; /* index into sigfoll[] */ + unsigned char acc[CACHESZ]; /* nonzero for accepting states */ + unsigned char leftmost; /* leftmost() start, not BOL */ + unsigned char leftbol; /* leftmost() start, w/BOL */ + unsigned char anybol; /* any match start, w/BOL */ + unsigned char nfix; /* number of invariant states */ + unsigned char top; /* next state index available */ + unsigned char flags; /* interesting flags */ + unsigned char trans[CACHESZ][NCHAR]; /* goto table */ +}; + +extern int regtrans(Dfa *, int, w_type, int); + +#endif /* !LIBUXRE_REGDFA_H */ diff --git a/libuxre/regerror.c b/libuxre/regerror.c @@ -0,0 +1,95 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regerror.c 1.4 (gritter) 3/29/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <string.h> +#include "re.h" +/* include "_locale.h" */ + +/* #pragma weak regerror = _regerror */ + +size_t +regerror(int err, const regex_t *ep, char *str, size_t max) +{ + const struct + { + int index; + const char *str; + } unk = + { + 88, "unknown regular expression error" + }, msgs[] = + { + /*ENOSYS*/ { 89, "feature not implemented" }, + /*0*/ { 0, "" }, + /*NOMATCH*/ { 90, "regular expression failed to match" }, + /*BADPAT*/ { 91, "invalid regular expression" }, + /*ECOLLATE*/ { 92, "invalid collating element construct" }, + /*ECTYPE*/ { 93, "invalid character class construct" }, + /*EEQUIV*/ { 94, "invalid equivalence class construct" }, + /*EBKTCHAR*/ { 95, "invalid character in '[ ]' construct" }, + /*EESCAPE*/ { 96, "trailing \\ in pattern" }, + /*ESUBREG*/ { 97, "'\\digit' out of range" }, + /*EBRACK*/ { 98, "'[ ]' imbalance" }, + /*EMPTYSUBBKT*/ { 99, "empty nested '[ ]' construct" }, + /*EMPTYPAREN*/ { 100, "empty '\\( \\)' or '( )'" }, + /*NOPAT*/ { 101, "empty pattern" }, + /*EPAREN*/ { 102, "'\\( \\)' or '( )' imbalance" }, + /*EBRACE*/ { 103, "'\\{ \\} or '{ }' imbalance" }, + /*BADBR*/ { 104, "invalid '\\{ \\}' or '{ }'" }, + /*ERANGE*/ { 105, "invalid endpoint in range" }, + /*ESPACE*/ { 106, "out of regular expression memory" }, + /*BADRPT*/ { 107, "invalid *, +, ?, \\{\\} or {} operator" }, + /*BADESC*/ { 108, "invalid escape sequence (e.g. \\0)" }, + /*ILLSEQ*/ { 109, "illegal byte sequence"} + }; + const char *p; + size_t len; + int i; + + if (err < REG_ENOSYS || REG_ILLSEQ < err) + { + i = unk.index; + p = unk.str; + } + else + { + i = msgs[err - REG_ENOSYS].index; + p = msgs[err - REG_ENOSYS].str; + } +/* p = __gtxt(_str_uxlibc, i, p); */ + len = strlen(p) + 1; + if (max != 0) + { + if (max > len) + max = len; + else if (max < len) + str[--max] = '\0'; + memcpy(str, p, max); + } + return len; +} diff --git a/libuxre/regex.h b/libuxre/regex.h @@ -0,0 +1,153 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regex.h 1.13 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_REGEX_H +#define LIBUXRE_REGEX_H +/* from unixsrc:usr/src/common/head/regex.h /main/uw7_nj/1 */ + +#include <sys/types.h> /* really only want [s]size_t */ + + /* + * Official regexec() flags. + */ +#define REG_NOTBOL 0x000001 /* start of string does not match ^ */ +#define REG_NOTEOL 0x000002 /* end of string does not match $ */ + + /* + * Additional regexec() flags. + */ +#define REG_NONEMPTY 0x000004 /* do not match empty at start of string */ + + /* + * Extensions to provide individual control over each + * of the differences between basic and extended REs. + */ +#define REG_OR 0x0000001 /* enable | operator */ +#define REG_PLUS 0x0000002 /* enable + operator */ +#define REG_QUEST 0x0000004 /* enable ? operator */ +#define REG_BRACES 0x0000008 /* use {m,n} (instead of \{m,n\}) */ +#define REG_PARENS 0x0000010 /* use (...) [instead of \(...\)] */ +#define REG_ANCHORS 0x0000020 /* ^ and $ are anchors anywhere */ +#define REG_NOBACKREF 0x0000040 /* disable \digit */ +#define REG_NOAUTOQUOTE 0x0000080 /* no automatic quoting of REG_BADRPTs */ + + /* + * Official regcomp() flags. + */ +#define REG_EXTENDED (REG_OR | REG_PLUS | REG_QUEST | REG_BRACES | \ + REG_PARENS | REG_ANCHORS | \ + REG_NOBACKREF | REG_NOAUTOQUOTE) +#define REG_ICASE 0x0000100 /* ignore case */ +#define REG_NOSUB 0x0000200 /* only success/fail for regexec() */ +#define REG_NEWLINE 0x0000400 /* take \n as line separator for ^ and $ */ + + /* + * Additional regcomp() flags. + * Some of these assume that int is >16 bits! + * Beware: 0x20000000 and above are used in re.h. + */ +#define REG_ONESUB 0x0000800 /* regexec() only needs pmatch[0] */ +#define REG_MTPARENFAIL 0x0001000 /* take empty \(\) or () as match failure */ +#define REG_MTPARENBAD 0x0002000 /* disallow empty \(\) or () */ +#define REG_BADRANGE 0x0004000 /* accept [m-a] ranges as [ma] */ +#define REG_ODDRANGE 0x0008000 /* oawk oddity: [m-a] means [m] */ +#define REG_SEPRANGE 0x0010000 /* disallow [a-m-z] style ranges */ +#define REG_BKTQUOTE 0x0020000 /* allow \ in []s to quote \, -, ^ or ] */ +#define REG_BKTEMPTY 0x0040000 /* allow empty []s (w/BKTQUOTE, BKTESCAPE) */ +#define REG_ANGLES 0x0080000 /* enable \<, \> operators */ +#define REG_ESCNL 0x0100000 /* take \n as newline character */ +#define REG_NLALT 0x0200000 /* take newline as alternation */ +#define REG_ESCSEQ 0x0400000 /* otherwise, take \ as start of C escapes */ +#define REG_BKTESCAPE 0x0800000 /* allow \ in []s to quote next anything */ +#define REG_NOBRACES 0x1000000 /* disable {n,m} */ +#define REG_ADDITIVE 0x2000000 /* a+*b means + and * additive, ^+ is valid */ +#define REG_NOI18N 0x4000000 /* disable I18N features ([::] etc.) */ +#define REG_OLDESC 0x8000000 /* recognize \b \f \n \r \t \123 only */ +#define REG_AVOIDNULL 0x10000000/* avoid null subexpression matches */ +#define REG_OLDBRE (REG_BADRANGE | REG_ANGLES | REG_ESCNL) +#define REG_OLDERE (REG_OR | REG_PLUS | REG_QUEST | REG_NOBRACES | \ + REG_PARENS | REG_ANCHORS | REG_ODDRANGE | \ + REG_NOBACKREF | REG_ADDITIVE | REG_NOAUTOQUOTE) + + /* + * Error return values. + */ +#define REG_ENOSYS (-1) /* unsupported */ +#define REG_NOMATCH 1 /* regexec() failed to match */ +#define REG_BADPAT 2 /* invalid regular expression */ +#define REG_ECOLLATE 3 /* invalid collating element construct */ +#define REG_ECTYPE 4 /* invalid character class construct */ +#define REG_EEQUIV 5 /* invalid equivalence class construct */ +#define REG_EBKTCHAR 6 /* invalid character in [] construct */ +#define REG_EESCAPE 7 /* trailing \ in pattern */ +#define REG_ESUBREG 8 /* number in \digit invalid or in error */ +#define REG_EBRACK 9 /* [] imbalance */ +#define REG_EMPTYSUBBKT 10 /* empty sub-bracket construct */ +#define REG_EMPTYPAREN 11 /* empty \(\) or () [REG_MTPARENBAD] */ +#define REG_NOPAT 12 /* no (empty) pattern */ +#define REG_EPAREN 13 /* \(\) or () imbalance */ +#define REG_EBRACE 14 /* \{\} or {} imbalance */ +#define REG_BADBR 15 /* contents of \{\} or {} invalid */ +#define REG_ERANGE 16 /* invalid endpoint in expression */ +#define REG_ESPACE 17 /* out of memory */ +#define REG_BADRPT 18 /* *,+,?,\{\} or {} not after r.e. */ +#define REG_BADESC 19 /* invalid escape sequence (e.g. \0) */ +#define REG_ILLSEQ 20 /* illegal byte sequence */ + +typedef struct +{ + size_t re_nsub; /* only advertised member */ + unsigned long re_flags; /* augmented regcomp() flags */ + struct re_dfa_ *re_dfa; /* DFA engine */ + struct re_nfa_ *re_nfa; /* NFA engine */ + struct re_coll_ *re_col; /* current collation info */ + int re_mb_cur_max; /* MB_CUR_MAX acceleration */ + void *re_more; /* just in case... */ +} regex_t; + +typedef ssize_t regoff_t; + +typedef struct +{ + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +#ifdef __cplusplus +extern "C" { +#endif + +int regcomp(regex_t *, const char *, int); +int regexec(const regex_t *, const char *, size_t, regmatch_t *, int); +size_t regerror(int, const regex_t *, char *, size_t); +void regfree(regex_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* !LIBUXRE_REGEX_H */ diff --git a/libuxre/regexec.c b/libuxre/regexec.c @@ -0,0 +1,68 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regexec.c 1.7 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regexec = _regexec */ + +int +regexec(const regex_t *ep, const char *s, size_t n, regmatch_t *mp, int flg) +{ + Exec ex; + int ret; + + ex.flags = flg | (ep->re_flags & (REG_NEWLINE|REG_ICASE|REG_AVOIDNULL)); + ex.str = (const unsigned char *)s; + ex.match = mp; + ex.mb_cur_max = ep->re_mb_cur_max; + if ((ex.nmatch = n) != 0) /* impose limits from compile flags */ + { + if (ep->re_flags & REG_NOSUB) + n = ex.nmatch = 0; + else if (ep->re_flags & REG_ONESUB) + ex.nmatch = 1; + else if (n > ep->re_nsub + 1) + ex.nmatch = ep->re_nsub + 1; + } + if (ep->re_flags & REG_DFA && ex.nmatch <= 1) + ret = libuxre_regdfaexec(ep->re_dfa, &ex); + else + ret = libuxre_regnfaexec(ep->re_nfa, &ex); + /* + * Fill unused part of mp[]. + */ + if (ret != 0) + ex.nmatch = 0; + while (n > ex.nmatch) + { + n--; + mp[n].rm_so = -1; + mp[n].rm_eo = -1; + } + return ret; +} diff --git a/libuxre/regfree.c b/libuxre/regfree.c @@ -0,0 +1,42 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regfree.c 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regfree = _regfree */ + +void +regfree(regex_t *ep) +{ + if (ep->re_flags & REG_DFA) + libuxre_regdeldfa(ep->re_dfa); + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + if (ep->re_col != 0) + (void)libuxre_lc_collate(ep->re_col); +} diff --git a/libuxre/regnfa.c b/libuxre/regnfa.c @@ -0,0 +1,1070 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <string.h> +#include <stdlib.h> +#include "re.h" +#include <stddef.h> +#include <ctype.h> + +typedef unsigned char Uchar; +typedef unsigned short Ushort; + +/* +* Nondeterministic Finite Automata. +*/ +typedef struct t_graph Graph; +struct t_graph +{ + union + { + Graph *ptr; + Info info; + } alt; + Graph *next; + w_type op; +}; + +typedef struct t_stack Stack; +struct t_stack +{ + Stack *link; /* simplifies cleanup */ + Stack *prev; /* covered states */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ +}; + + /* + * A Context holds all the information needed for each + * potential path through the NFA graph. + */ +typedef struct t_ctxt Context; +struct t_ctxt +{ + Context *link; /* simplifies cleanup */ + Context *next; /* singly linked */ + Stack *sp; /* nested counts */ + Graph *gp; /* starting node */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ + size_t nset; /* length of rm[] that is currently set */ + regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */ +}; + +struct re_nfa_ /*Nfa*/ +{ + Graph *gp; /* entire NFA */ + Stack *sp; /* unused Stacks */ + Stack *allsp; /* linked Stacks (for cleanup) */ + Context *allcp; /* linked Contexts (for cleanup) */ + Context *cur; /* Contexts to be continued now */ + Context *step; /* Contexts waiting for a step of the NFA */ + Context *avail; /* unused Contexts */ + Context **ecur; /* ends cur list of Contexts */ + Context **estp; /* ends step list of Contexts */ + size_t rmlen; /* length of rm[] in each Context */ + size_t rmmin; /* minimum length needed */ + size_t used; /* length used for this libuxre_regnfaexec() */ + w_type beg; /* nonzero for fixed char initial node NFAs */ +}; + +#define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */ + + /* + * Depth first traversal. + * Make a singly linked list (in alt.ptr) of the graph's nodes. + * Must toss any ROP_BKTs, too, since "alt" is overwritten. + */ +static void +deltolist(Graph *gp, Graph **list) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* first time */ + { + gp->next = 0; + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + deltolist(gp->alt.ptr, list); + deltolist(ptr, list); + if (gp->op == ROP_BKT) + { + libuxre_bktfree(gp->alt.info.bkt); + free(gp->alt.info.bkt); + } + } + else if (gp->op == ROP_END) + gp->op = ROP_NOP; + else + return; + gp->alt.ptr = *list; + *list = gp; +} + + /* + * After the list is turned into a linked list, + * walk that list freeing the nodes. + */ +static void +delgraph(Graph *gp) +{ + Graph *gp2, end; + + gp2 = &end; + deltolist(gp, &gp2); + while ((gp = gp2) != &end) + { + gp2 = gp->alt.ptr; + free(gp); + } +} + + /* + * Depth first traversal. + * Look for ROP_NOPs and prune them from the graph. + * Chain them all together on *nop's list. + */ +static Graph * +nopskip(Graph *gp, Graph **nop) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */ + { + if (gp->op == ROP_NOP) + { + if (gp->alt.ptr != 0) /* touched */ + return gp->next; /* already did it */ + gp->alt.ptr = *nop; + *nop = gp; + } + gp->next = 0; /* this subgraph's pending */ + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + gp->alt.ptr = nopskip(gp->alt.ptr, nop); + gp->next = nopskip(ptr, nop); + if (gp->op == ROP_NOP) + return gp->next; + } + return gp; +} + + /* + * Postorder traversal of the parse tree. + * Build a graph using "Thompson's" algorithm. + * The only significant modification is the + * ROP_BRACE->ROP_MTOR construction. + * Returns 1 => graph might match empty + * 0 => graph cannot match empty + * -1 => error (in allocation) + */ +static int +mkgraph(Tree *tp, Graph **first, Graph **last) +{ + Graph *new = 0, *nop, *lf, *ll, *rf, *rl; + int lmt, rmt = 0; + + if (tp->op != ROP_CAT) + { + if ((new = malloc(sizeof(Graph))) == 0) + return 0; + new->op = tp->op; /* usually */ + } + switch (tp->op) + { + case ROP_REF: + new->alt.info.sub = tp->right.info.sub; + *first = new; + *last = new; + return 1; /* safe--can't really tell */ + case ROP_BKT: + tp->op = ROP_BKTCOPY; /* now graph owns clean up */ + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + new->alt.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + *first = new; + *last = new; + return 0; + case ROP_EMPTY: + new->op = ROP_NOP; + new->alt.ptr = 0; /* untouched */ + *first = new; + *last = new; + return 1; + case ROP_OR: + case ROP_CAT: + lf = 0; /* in case of error */ + if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0) + goto err; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_BRACE: + case ROP_LP: + if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0) + goto err; + break; + } + /* + * Note that ROP_NOP only serves as the node that reconnects + * the two choices of an incoming ROP_OR or ROP_QUEST. To + * prevent rewalking portions of the graph in nopskip(), + * this code marks all ROP_NOP nodes as currently untouched. + */ + switch (tp->op) + { + case ROP_OR: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + ll->next = nop; + rl->next = nop; + new->next = lf; + new->alt.ptr = rf; + *first = new; + *last = nop; + return lmt | rmt; + case ROP_CAT: /* no "new" */ + ll->next = rf; + *first = lf; + *last = rl; + return lmt & rmt; + case ROP_QUEST: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + new->op = ROP_OR; + new->next = lf; + new->alt.ptr = nop; + ll->next = nop; + *first = new; + *last = nop; + return 1; + case ROP_STAR: + *first = new; + rmt = 1; + star:; + new->op = lmt ? ROP_MTOR : ROP_OR; + new->alt.ptr = lf; + ll->next = new; + *last = new; + return rmt; + case ROP_PLUS: + *first = lf; + rmt = lmt; + goto star; + case ROP_BRACE: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_MTOR; /* going to save state anyway... */ + nop->alt.ptr = lf; + ll->next = new; + new->next = nop; + new->alt.info.num[1] = tp->right.info.num[1]; + if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0) + { + lmt = 1; + *first = new; + } + else + { + new->alt.info.num[0]--; /* already done 1 */ + if (new->alt.info.num[1] != BRACE_INF) + new->alt.info.num[1]--; /* likewise */ + *first = lf; + } + *last = nop; + return lmt; + case ROP_LP: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_RP; + nop->alt.info.sub = tp->right.info.sub; + new->alt.info.sub = tp->right.info.sub; + new->next = lf; + ll->next = nop; + *first = new; + *last = nop; + return lmt; + } +err:; + if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0) + delgraph(rf); + if (lf != 0) + delgraph(lf); + if (tp->op != ROP_CAT) + free(new); + return -1; +} + + /* + * Semi-preorder traversal. + * Return zero if there's no simple first character + * (including the operation ROP_BOL) that must always + * be at the start of a matching string. + * This code doesn't attempt to get an answer if the + * first of the tree many be empty. + */ +static w_type +firstop(Tree *tp) +{ + w_type op; + + switch (tp->op) + { + case ROP_OR: + if ((op = firstop(tp->left.ptr)) == 0 + || op != firstop(tp->right.ptr)) + { + return 0; + } + return op; + case ROP_BRACE: + if (tp->right.info.num[0] == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_CAT: + case ROP_PLUS: + case ROP_LP: + return firstop(tp->left.ptr); + default: + if (tp->op < 0) + return 0; + /*FALLTHROUGH*/ + case ROP_BOL: + return tp->op; + } +} + +void +libuxre_regdelnfa(Nfa *np) +{ + Context *cp, *cpn; + Stack *sp, *spn; + + if (np->gp != 0) + delgraph(np->gp); + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + free(cp); + } + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + free(sp); + } + free(np); +} + +LIBUXRE_STATIC int +libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Graph *gp, end; + Nfa *np; + + if ((np = malloc(sizeof(Nfa))) == 0) + goto err; + np->gp = 0; /* in case of error */ + if (mkgraph(tp, &np->gp, &gp) < 0) + goto err; + gp->next = 0; /* nothing follows ROP_END */ + np->rmlen = 0; + if ((ep->re_flags & REG_NOSUB) == 0) + np->rmlen = ep->re_nsub + 1; + np->rmmin = 0; + if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen) + np->rmlen = np->rmmin; + /* + * Delete all ROP_NOPs from the graph. + * nopskip() disconnects them from the graph and + * links them together through their alt.ptr's. + */ + gp = &end; + np->gp = nopskip(np->gp, &gp); + while (gp != &end) + { + Graph *gp2 = gp; + + gp = gp->alt.ptr; + free(gp2); + } + np->sp = 0; + np->allsp = 0; + np->avail = 0; + np->allcp = 0; + ep->re_nfa = np; + np->beg = firstop(tp); + return 0; +err:; + if (np != 0) + { + if (np->gp != 0) + delgraph(np->gp); + free(np); + } + return REG_ESPACE; +} + +static Stack * +newstck(Nfa *np) +{ + Stack *sp, **spp; + int i; + + if ((sp = np->sp) == 0) /* get more */ + { + spp = &np->sp; + i = 4; + while ((sp = malloc(sizeof(Stack))) != 0) + { + sp->link = np->allsp; + np->allsp = sp; + *spp = sp; + spp = &sp->prev; + if (--i == 0) + break; + } + *spp = 0; + if ((sp = np->sp) == 0) /* first malloc failed */ + return 0; + } + np->sp = sp->prev; + return sp; +} + +static int +mkstck(Nfa *np, Context *cp, Graph *gp) +{ + Stack *new, *sp; + + if (gp == 0) /* copy existing stack tail */ + { + /* + * Hoist up top of stack. + */ + new = cp->sp; + cp->wasgp = new->wasgp; + cp->str = new->str; + cp->cnt = new->cnt; + cp->sp = new->prev; + if ((sp = new->prev) == 0) /* only one below */ + { + new->prev = np->sp; + np->sp = new; + cp->sp = 0; + return 0; + } + for (;;) /* copy the rest; reusing the old top */ + { + new->wasgp = sp->wasgp; + new->str = sp->str; + new->cnt = sp->cnt; + if ((new->prev = sp->prev) == 0) + break; + if ((new->prev = newstck(np)) == 0) + return REG_ESPACE; + new = new->prev; + sp = sp->prev; + } + return 0; + } + if (cp->wasgp != 0) /* push current down */ + { + if ((new = newstck(np)) == 0) + return REG_ESPACE; + new->prev = cp->sp; + cp->sp = new; + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + } + cp->wasgp = gp; + cp->str = 0; + cp->cnt = 0; + return 0; +} + + /* + * Allocate a new Context (from np->avail) + * and add it to the end of the current list. + */ +static int +newctxt(Nfa *np, Context *cp, Graph *gp) +{ + Context *new; + size_t n; + + if ((new = np->avail) == 0) /* need more */ + { + Context *ncp, **cpp; + int i; + + /* + * Can't easily allocate Contexts in one call because + * the alignments (given the varying length of rm[]) + * are potentially nontrivial. + */ + n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t); + i = 4; + cpp = &np->avail; + while ((ncp = malloc(n)) != 0) + { + ncp->link = np->allcp; + np->allcp = ncp; + *cpp = ncp; + cpp = &ncp->next; + if (--i == 0) + break; + } + *cpp = 0; + if ((new = np->avail) == 0) /* first malloc failed */ + return REG_ESPACE; + } + np->avail = new->next; + new->next = 0; + new->gp = gp; + new->sp = 0; + new->wasgp = 0; + new->nset = 0; + if (cp != 0) /* copy existing context information */ + { + if (cp->sp != 0) /* copy tail of stack */ + { + new->sp = cp->sp; + if (mkstck(np, new, 0) != 0) + return REG_ESPACE; + } + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + /* + * Copy any valid subexpression match information + * from the existing context. + */ + if (np->used != 0 && (n = cp->nset) != 0) + { + regmatch_t *rmn = new->rm, *rmo = cp->rm; + + new->nset = n; + for (;; ++rmn, ++rmo) + { + rmn->rm_so = rmo->rm_so; + rmn->rm_eo = rmo->rm_eo; + if (--n == 0) + break; + } + } + } + /* + * Append it to the end of the current Context list. + */ + *np->ecur = new; + np->ecur = &new->next; + return 0; +} + + /* + * Compare two byte string sequences for equality. + * If REG_ICASE, walk through the strings doing + * caseless comparisons of the wide characters. + */ +static int +casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max) +{ + const Uchar *p = &xp->str[i]; + const Uchar *end; + w_type wc1, wc2; + int k; + + if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */ + return 1; + if ((xp->flags & REG_ICASE) == 0) + return 0; + /* + * Walk through each testing for a match, ignoring case, + * of the resulting wide characters. + * Note that only "s" can run out of characters. + */ + end = &p[n]; + do + { + if ((wc1 = *s++) == '\0') + return 0; + if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0) + s += k; + if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0) + p += k; + if (wc1 != wc2) + { + wc1 = to_lower(wc1); + wc2 = to_lower(wc2); + if (wc1 != wc2) + return 0; + } + } while (p < end); + return 1; +} + +LIBUXRE_STATIC int +libuxre_regnfaexec(Nfa *np, Exec *xp) +{ + const Uchar *s, *s1, *s2; + Context *cp, *cpn; + Graph *gp, *brace; + Stack *sp, *spn; + ssize_t rmso, len; + int i, ret, mb_cur_max; + w_type wc; + size_t n; + + ret = 0; /* assume it matches */ + rmso = -1; /* but no match yet */ + np->cur = 0; + np->step = 0; + np->ecur = &np->cur; + np->estp = &np->step; + if ((np->used = xp->nmatch) < np->rmmin) + np->used = np->rmmin; + s1 = 0; /* one char back */ + s = xp->str; /* current high water in string */ + mb_cur_max = xp->mb_cur_max; + for (;;) + { + /* + * Get next character from string. + * If the engine proper hasn't started and the engine + * requires a particular character to start and this + * character isn't it, try the next one. + */ + for (;;) + { + s2 = s1; + s1 = s; + if (!ISONEBYTE(wc = *s++) && + (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if (np->cur != 0 || np->beg == wc || np->beg == 0) + break; + if (np->beg == ROP_BOL) + { + if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0) + break; + if ((xp->flags & REG_NEWLINE) == 0) + goto nomatch; + if (s2 != 0 && *s2 == '\n') + break; + } + if (wc == '\0') + goto nomatch; + } + /* + * Start the engine by inserting a fresh initial context + * if there's no known match as yet. (Once some match + * has been found, the end is near.) + */ + if (rmso < 0 && newctxt(np, 0, np->gp) != 0) + goto err; + /* + * Walk the current Contexts list, trying each. + * "loop" is when a new Context is to be tried, + * "again" is when the same Context continues, + * but wc was not yet matched. + */ + cp = np->cur; + loop:; + gp = cp->gp; + again:; + switch (gp->op) + { + case ROP_BRACE: /* gp->next->op == ROP_MTOR */ + brace = gp; + gp = gp->next; + goto mtor; + case ROP_MTOR: + brace = 0; + mtor:; + if (cp->wasgp != gp) /* first time */ + { + if (mkstck(np, cp, gp) != 0) + goto err; + } + else if (cp->str == s) /* spinning */ + goto poptonext; + cp->str = s; + if (brace != 0) + { + if (cp->cnt >= brace->alt.info.num[1]) + goto poptonext; + if (++cp->cnt <= brace->alt.info.num[0]) + { + gp = gp->alt.ptr; + goto again; + } + if (cp->cnt > BRACE_MAX) + cp->cnt = BRACE_MAX; + } + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + poptonext:; + cp->wasgp = 0; + if ((sp = cp->sp) != 0) /* pop stack */ + { + cp->sp = sp->prev; + cp->wasgp = sp->wasgp; + cp->str = sp->str; + cp->cnt = sp->cnt; + sp->prev = np->sp; + np->sp = sp; + } + /*FALLTHROUGH*/ + case ROP_EMPTY: + tonext:; + gp = gp->next; + goto again; + case ROP_OR: + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + goto tonext; + case ROP_LP: + if ((n = gp->alt.info.sub) < np->used) + { + size_t k; + + cp->rm[n].rm_so = s1 - xp->str; + cp->rm[n].rm_eo = -1; + /* + * Mark any skipped subexpressions as + * failing to participate in the match. + */ + if ((k = cp->nset) < n) + { + regmatch_t *rmp = &cp->rm[k]; + + for (;; rmp++) + { + rmp->rm_so = -1; + rmp->rm_eo = -1; + if (++k >= n) + break; + } + } + cp->nset = n + 1; + } + goto tonext; + case ROP_RP: + if ((n = gp->alt.info.sub) < np->used) + cp->rm[n].rm_eo = s1 - xp->str; + goto tonext; + case ROP_BOL: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n') + goto failed; + goto tonext; + case ROP_EOL: + if (wc == '\0') + { + if (xp->flags & REG_NOTEOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n') + goto failed; + goto tonext; + default: /* character match */ + if (gp->op != wc) + { + if ((xp->flags & REG_ICASE) == 0 + || gp->op != to_lower(wc)) + { + goto failed; + } + } + nextwc:; + cp->gp = gp->next; + tostep:; + cpn = cp->next; + cp->next = 0; + *np->estp = cp; + np->estp = &cp->next; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_NOTNL: + if (wc == '\n') + goto failed; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc > '\0') + goto nextwc; + /*FALLTHROUGH*/ + case ROP_NONE: + failed:; + cpn = cp->next; + cp->next = np->avail; + np->avail = cp; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_LT: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else + { + w_type pwc; + + if (wc != '_' && + !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + if (!ISONEBYTE(pwc = *s2)) + libuxre_mb2wc(&pwc, &s2[1]); + if (pwc == '_' || + iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc)) + goto failed; + } + goto tonext; + case ROP_GT: + if (wc == '_' || + iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + goto tonext; + case ROP_BKT: + case ROP_BKTCOPY: + if (cp->wasgp == gp) /* rest of MCCE */ + { + checkspin:; + if (s1 >= cp->str) /* got it all */ + goto poptonext; + goto tostep; + } + if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s, + mb_cur_max)) < 0) + goto failed; + if ((n = i) == 0) /* only matched wc */ + goto nextwc; + spin:; + if (mkstck(np, cp, gp) != 0) + goto err; + cp->gp = gp; /* stay here until reach past s+n */ + cp->str = s + n; + goto tostep; + case ROP_REF: + if (cp->wasgp == gp) /* rest of matched string */ + goto checkspin; + if ((n = gp->alt.info.sub) >= cp->nset) + goto failed; + if ((len = cp->rm[n].rm_eo) < 0) + goto failed; + if ((len -= n = cp->rm[n].rm_so) == 0) + goto tonext; + if (casecmp(s1, xp, n, len, mb_cur_max) == 0) + goto failed; + if ((n = s - s1) >= len) + goto nextwc; + n = len - n; + goto spin; + case ROP_END: /* success! */ + if (xp->flags & REG_NONEMPTY) + { + if (s2 == 0) + goto failed; + } + if (xp->nmatch == 0) + goto match; + /* + * Mark any skipped subexpressions as failing to match. + */ + if ((n = cp->nset) < xp->nmatch) + { + do + { + cp->rm[n].rm_so = -1; + cp->rm[n].rm_eo = -1; + } while (++n < xp->nmatch); + } + /* + * Note the left-most match that's longest. + */ + n = cp->rm[0].rm_so; + if (rmso < 0 || n < rmso) + { + rmso = n; + record:; + memcpy(xp->match, cp->rm, + xp->nmatch * sizeof(regmatch_t)); + goto failed; + } + if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo) + goto failed; + if (xp->match[0].rm_eo < cp->rm[0].rm_eo) + goto record; +#if 0 /* maximize the lengths of earlier LP...RPs */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * a "longest submatch from left to right" + * where an empty string wins over a nonmatch. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + + /* + * First, go with the choice that has any + * match for subexpr n. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (nlen < 0) + { + if (len >= 0) + break; + } + else if (len < 0) + goto record; + /* + * Both have a match; go with the longer. + */ + len -= xp->match[n].rm_so; + nlen -= cp->rm[n].rm_so; + if (nlen < len) + break; + if (nlen > len) + goto record; + } +#else /* take LP and RP as "fence posts" and maximize earlier gaps */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * the larger earlier subpatterns, in which + * each rm_so and rm_eo serves as a separator. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + int use; + + if (xp->flags & REG_AVOIDNULL) { + /* + * This is to to satisfy POSIX.1-2001 + * XBD pp. 172-173 ll. 6127-6129, whose + * translation is "do not match null + * expressions if there is a choice". + * See also POSIX.2 interpretation #43 + * in which the question was raised. + * + * The first subexpression of "\(x*\)*" + * must thus match the string "xxx". + */ + use = cp->rm[n].rm_eo - + cp->rm[n].rm_so >= + xp->match[n].rm_eo - + xp->match[n].rm_so || + xp->match[n].rm_so < 0; + } else + use = 1; + /* + * Choose the rightmost ROP_LP as that + * maximizes the gap from before. + */ + len = xp->match[n].rm_so; + nlen = cp->rm[n].rm_so; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + /* + * The ROP_LPs are at the same point: + * Choose the rightmost ROP_RP. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + } +#endif + goto failed; + } + /* + * Finished the current Context list. If the input string + * has been entirely scanned, we're done. Otherwise, make + * the next step list current for the next character. + * If the next step list was empty and there's an existing + * match, that's the left-most longest. + */ + if (wc == '\0') + { + if (rmso >= 0) + goto match; + goto nomatch; + } + np->ecur = np->estp; + if ((np->cur = np->step) == 0) + { + if (rmso >= 0) + goto match; + np->ecur = &np->cur; /* was pointing at step */ + } + np->step = 0; + np->estp = &np->step; + } +nomatch:; + ret = REG_NOMATCH; +match:; + np->avail = 0; + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + cp->next = np->avail; + np->avail = cp; + } + np->sp = 0; + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + sp->prev = np->sp; + np->sp = sp; + } + return ret; +err:; + ret = REG_ESPACE; + goto match; +} diff --git a/libuxre/regparse.c b/libuxre/regparse.c @@ -0,0 +1,1091 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <stdlib.h> +#include <ctype.h> +#include "re.h" + +LIBUXRE_STATIC void +libuxre_regdeltree(Tree *tp, int all) +{ + if (tp == 0) + return; + if (tp->op < 0) + { + switch (KIND_ROP(tp->op)) + { + case BINARY_ROP: + libuxre_regdeltree(tp->right.ptr, all); + /*FALLTHROUGH*/ + case UNARY_ROP: + libuxre_regdeltree(tp->left.ptr, all); + break; + default: + if (tp->op == ROP_BKT && all) + { + libuxre_bktfree(tp->right.info.bkt); + free(tp->right.info.bkt); + } + break; + } + } + free(tp); +} + +LIBUXRE_STATIC Tree * +libuxre_reg1tree(w_type op, Tree *lp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + if (lp != 0) + libuxre_regdeltree(lp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + if (lp != 0) + lp->parent = tp; + return tp; +} + +LIBUXRE_STATIC Tree * +libuxre_reg2tree(w_type op, Tree *lp, Tree *rp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + libuxre_regdeltree(lp, 1); + libuxre_regdeltree(rp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + lp->parent = tp; + tp->right.ptr = rp; + rp->parent = tp; + return tp; +} + +static int +lex(Lex *lxp) +{ + size_t num; + w_type wc; + int n, mb_cur_max; + + mb_cur_max = lxp->mb_cur_max; +nextc: switch (wc = *lxp->pat++) /* interesting ones are single bytes */ + { + case '\0': + lxp->pat--; /* continue to report ROP_END */ + wc = ROP_END; + break; + case '(': + if (lxp->flags & REG_PARENS) + { + leftparen:; + /* + * Must keep track of the closed and + * yet-to-be closed groups as a list. + * Consider (()a(()b(()c(()d... in which + * at each letter another even-numbered + * group is made available, but no + * odd-numbered ones are. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + if (lxp->nleft >= lxp->nclist) /* grow it */ + { + unsigned char *p; + + lxp->nclist += 8; /* arbitrary */ + if ((p = realloc(lxp->clist, + lxp->nclist)) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + lxp->clist = p; + } + lxp->clist[lxp->nleft] = 0; /* unavailable */ + } + lxp->nleft++; + wc = ROP_LP; + } + break; + case ')': + /* + * For REG_PARENS, only take a right paren as a close + * if there is a matching left paren. + */ + if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft) + { + lxp->nright++; + rightparen:; + /* + * The group that is being closed is the highest + * numbered as-yet-unclosed group. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + num = lxp->nleft; + while (lxp->clist[--num] != 0) + ; + lxp->clist[num] = 1; + } + wc = ROP_RP; + } + break; + case '.': + wc = ROP_ANYCH; + if (lxp->flags & REG_NEWLINE) + wc = ROP_NOTNL; + break; + case '*': + if (lxp->flags & REG_ADDITIVE) + { + nxtstar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtstar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + lxp->pat++; + goto nxtstar; + } + } + wc = ROP_STAR; + break; + case '^': + /* + * Look "behind" to see if this is an anchor. + * Take it as an anchor if it follows an alternation + * operator. (lxp->tok is initially set to ROP_OR.) + */ + if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtcar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtcar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtcar; + } + if (optional) + goto nextc; + } + wc = ROP_BOL; + } + break; + case '$': + /* + * Look ahead to see if this is an anchor, + * unless any '$' is an anchor. + * Take it as an anchor if it occurs just before + * the pattern end or an alternation operator. + */ + if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0' + || (lxp->flags & REG_OR && *lxp->pat == '|') + || (lxp->flags & REG_NLALT && *lxp->pat == '\n')) + { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtdol: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtdol; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtdol; + } + if (optional) + goto nextc; + } + wc = ROP_EOL; + } + break; + case '+': + if (lxp->flags & REG_PLUS) + { + wc = ROP_PLUS; + if (lxp->flags & REG_ADDITIVE) + { + nxtplus: switch (*lxp->pat) + { + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '+': + lxp->pat++; + goto nxtplus; + } + } + } + break; + case '?': + if (lxp->flags & REG_QUEST) + { + wc = ROP_QUEST; + if (lxp->flags & REG_ADDITIVE) + { + nxtquest: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '?': + lxp->pat++; + goto nxtquest; + } + } + } + break; + case '\n': + if (lxp->flags & REG_NLALT) + { + /* + * Even when newline is an alternative separator, + * it doesn't permit parenthesized subexpressions + * to include it. + */ + if (lxp->nleft != lxp->nright) + { + lxp->err = REG_EPAREN; + return -1; + } + wc = ROP_OR; + } + else if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + break; + case '|': + if (lxp->flags & REG_OR) + wc = ROP_OR; + break; + case '[': + if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + if ((lxp->flags & REG_GOTBKT) == 0) /* first time */ + { + struct lc_collate *col; + + lxp->flags |= REG_GOTBKT; + lxp->bktflags = 0; + if (lxp->flags & REG_ICASE) + lxp->bktflags |= BKT_ONECASE; + if (lxp->flags & REG_NEWLINE) + lxp->bktflags |= BKT_NOTNL; + if (lxp->flags & REG_BADRANGE) + lxp->bktflags |= BKT_BADRANGE; + if (lxp->flags & REG_ODDRANGE) + lxp->bktflags |= BKT_ODDRANGE; + if (lxp->flags & REG_SEPRANGE) + lxp->bktflags |= BKT_SEPRANGE; + if (lxp->flags & REG_BKTQUOTE) + lxp->bktflags |= BKT_QUOTE; + if (lxp->flags & REG_BKTEMPTY) + lxp->bktflags |= BKT_EMPTY; + if (lxp->flags & REG_ESCNL) + lxp->bktflags |= BKT_ESCNL; + if (lxp->flags & REG_NLALT) + lxp->bktflags |= BKT_NLBAD; + if (lxp->flags & REG_ESCSEQ) + lxp->bktflags |= BKT_ESCSEQ; + if (lxp->flags & REG_BKTESCAPE) + lxp->bktflags |= BKT_ESCAPE; + if (lxp->flags & REG_NOI18N) + lxp->bktflags |= BKT_NOI18N; + if (lxp->flags & REG_OLDESC) + lxp->bktflags |= BKT_OLDESC; + if ((col = libuxre_lc_collate(0)) != 0) + { + if (col->maintbl == 0 + || col->flags & CHF_ENCODED) + { + (void)libuxre_lc_collate(col); + col = 0; + } + else if (col->flags & CHF_MULTICH) + lxp->flags |= REG_NFA; + } + lxp->col = col; + } + n = lxp->bktflags; + if (*lxp->pat == '^') + { + n |= BKT_NEGATED; + lxp->pat++; + } + lxp->info.bkt->col = lxp->col; + if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat, + n, mb_cur_max)) < 0) + { + free(lxp->info.bkt); + lxp->err = -n; /* convert to REG_* errors */ + return -1; + } + /* + * NFA forced if newline can be a match and REG_NEWLINE is set. + */ + if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE + && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */ + && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0) + { + lxp->flags |= REG_NFA; + } + lxp->pat += n; + wc = ROP_BKT; + break; + case '{': + if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0) + break; + interval:; + if (!isdigit(num = *lxp->pat)) + { + badbr:; + lxp->err = REG_BADBR; + if (*lxp->pat == '\0') + lxp->err = REG_EBRACE; /* more accurate */ + return -1; + } + num -= '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + lxp->info.num[0] = num; + lxp->info.num[1] = num; + if (wc == ',') + { + lxp->info.num[1] = BRACE_INF; + if (isdigit(wc = *++lxp->pat)) + { + num = wc - '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + if (num < lxp->info.num[0]) + goto badbr; + lxp->info.num[1] = num; + } + } + if ((lxp->flags & REG_BRACES) == 0) + { + if (wc != '\\') + goto badbr; + wc = *++lxp->pat; + } + if (wc != '}') + goto badbr; + lxp->pat++; + wc = ROP_BRACE; + /* + * Replace interval with simpler equivalents where possible, + * even when the operators are not otherwise available. + */ + if (lxp->info.num[1] <= 1) + { + if (lxp->info.num[0] == 1) + wc = ROP_NOP; /* {1,1} is noise */ + else if (lxp->info.num[1] == 0) + wc = ROP_EMPTY; /* {0,0} is empty string */ + else + wc = ROP_QUEST; /* {0,1} is ? */ + } + else if (lxp->info.num[1] == BRACE_INF) + { + if (lxp->info.num[0] == 0) + wc = ROP_STAR; + else if (lxp->info.num[0] == 1) + wc = ROP_PLUS; + else if (lxp->info.num[0] > BRACE_DFAMAX) + lxp->flags |= REG_NFA; + } + else if (lxp->info.num[1] > BRACE_DFAMAX) + { + lxp->flags |= REG_NFA; + } + break; + case '\\': + switch (wc = *lxp->pat++) + { + case '\0': + lxp->err = REG_EESCAPE; + return -1; + case '<': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_LT; + } + goto out; + case '>': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_GT; + } + goto out; + case '(': + if ((lxp->flags & REG_PARENS) == 0) + goto leftparen; + goto out; + case ')': + if ((lxp->flags & REG_PARENS) == 0) + { + if (++lxp->nright > lxp->nleft) + { + lxp->err = REG_EPAREN; + return -1; + } + goto rightparen; + } + goto out; + case '{': + if (lxp->flags & (REG_BRACES|REG_NOBRACES)) + goto out; + goto interval; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = wc - '0'; + if ((lxp->flags & REG_NOBACKREF) == 0) + { + backref:; + if (num > lxp->nleft + || lxp->clist[num - 1] == 0) + { + lxp->err = REG_ESUBREG; + return -1; + } + lxp->info.sub = num; + if (lxp->maxref < num) + lxp->maxref = num; + lxp->flags |= REG_NFA; + wc = ROP_REF; + goto out; + } + /* + * For compatibility (w/awk), permit "octal" 8 and 9. + * Already have the value of the first digit in num. + * + * If REG_OLDESC, exactly three digits must be present. + */ + tryoctal:; + if ((lxp->flags & REG_ESCSEQ) == 0) + goto out; + if ((wc = *lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + if ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + lxp->pat++; + } + else if (lxp->flags & REG_OLDESC) + { + lxp->pat--; + wc = lxp->pat[-1]; + goto out; + } + } + else if (lxp->flags & REG_OLDESC) + { + wc = lxp->pat[-1]; + goto out; + } + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + goto out; + case '0': + if ((lxp->flags & REG_NOBACKREF) == 0 + && (num = *lxp->pat) >= '0' && num <= '9') + { + num -= '0'; + /* + * This loop ignores wraparounds. + * Keep track of number of digits in n. + */ + n = 1; + while ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num *= 10; + num += wc - '0'; + n++; + } + if (num != 0) + goto backref; + lxp->pat -= n; + } + num = 0; + goto tryoctal; + case 'a': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\a'; + goto out; + case 'b': + if (lxp->flags & REG_ESCSEQ) + wc = '\b'; + goto out; + case 'f': + if (lxp->flags & REG_ESCSEQ) + wc = '\f'; + goto out; + case 'n': + if (lxp->flags & (REG_ESCSEQ | REG_ESCNL)) + { + wc = '\n'; + if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + } + goto out; + case 'r': + if (lxp->flags & REG_ESCSEQ) + wc = '\r'; + goto out; + case 't': + if (lxp->flags & REG_ESCSEQ) + wc = '\t'; + goto out; + case 'v': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\v'; + goto out; + case 'x': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ + && isxdigit(num = *lxp->pat)) + { + wc = num; + num = 0; + /* + * Take as many hex digits as possible, + * ignoring overflows. + * If the result (squeezed into a w_type) + * is positive, it's okay. + */ + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + num <<= 4; + num |= wc; + } while (isxdigit(wc = *++lxp->pat)); + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + } + goto out; + } + /*FALLTHROUGH*/ + default: + if (!ISONEBYTE(wc)) + { + if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0) + lxp->pat += n; + else if (n < 0) + { + lxp->err = REG_ILLSEQ; + return -1; + } + } + if (lxp->flags & REG_ICASE) + wc = to_lower(wc); + break; + } +out:; + lxp->tok = wc; + return 0; +} + +static Tree *alt(Lex *); + +static Tree * +leaf(Lex *lxp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + switch (tp->op = lxp->tok) /* covers most cases */ + { + default: + if (tp->op < 0) + { + lxp->err = REG_BADPAT; + tp->right.ptr = 0; + goto badunary; + } + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + tp->op = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + badunary:; + tp->left.ptr = 0; + goto err; + case ROP_ANYCH: + case ROP_NOTNL: + break; + case ROP_BOL: + case ROP_EOL: + case ROP_LT: + case ROP_GT: + /* + * Look ahead for what would have been taken to be + * postfix operators. + */ + if (lex(lxp) != 0) + goto err; + switch (lxp->tok) + { + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + lxp->tok = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + goto err; + } + return tp; + case ROP_BKT: + tp->right.info.bkt = lxp->info.bkt; + break; + case ROP_REF: + tp->right.info.sub = lxp->info.sub; + break; + case ROP_LP: + tp->right.info.sub = lxp->nleft; + if (lex(lxp) != 0) + goto badunary; + if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */ + { + if (lxp->flags & REG_MTPARENBAD) + { + lxp->err = REG_EMPTYPAREN; + goto badunary; + } + lxp->tok = ROP_EMPTY; + if (lxp->flags & REG_MTPARENFAIL) + lxp->tok = ROP_NONE; + if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0) + goto badunary; + } + else if ((tp->left.ptr = alt(lxp)) == 0) + { + if (lxp->err == REG_BADPAT) + goto parenerr; + goto badunary; + } + else if (lxp->tok != ROP_RP) + { + lxp->err = REG_BADPAT; + parenerr:; + if (lxp->nleft != lxp->nright) + lxp->err = REG_EPAREN; /* better choice */ + goto badunary; + } + tp->left.ptr->parent = tp; + break; + } + if (lex(lxp) != 0) + { + err:; + libuxre_regdeltree(tp, 1); + tp = 0; + } + return tp; +} + +static Tree * +post(Lex *lxp) +{ + Tree *lp; + + if ((lp = leaf(lxp)) == 0) + return 0; + switch (lxp->tok) + { + case ROP_EMPTY: /* this was {0,0} ROP_BRACE */ + libuxre_regdeltree(lp, 1); + lp = 0; + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + if (lxp->tok == ROP_BRACE) + lp->right.info = lxp->info; + /*FALLTHROUGH*/ + case ROP_NOP: /* this was {1,1} ROP_BRACE */ + if (lex(lxp) != 0) + { + libuxre_regdeltree(lp, 1); + return 0; + } + break; + } + return lp; +} + +static Tree * +cat(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = post(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok == ROP_OR || lxp->tok == ROP_RP + || lxp->tok == ROP_END) + { + return lp; + } + if ((rp = post(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +static Tree * +alt(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = cat(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok != ROP_OR) + return lp; + if (lex(lxp) != 0) + break; + if (lxp->tok == ROP_END) + return lp; /* ignore trailing '|' */ + if ((rp = cat(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +LIBUXRE_STATIC Tree * +libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags) +{ + Tree *lp, *rp; + + lp = 0; /* in case of error */ + lxp->clist = 0; + lxp->col = 0; + lxp->err = 0; + lxp->maxref = 0; + lxp->nleft = 0; + lxp->nright = 0; + lxp->nclist = 0; + lxp->mb_cur_max = MB_CUR_MAX; + if (flags & REG_OR && *pat == '|') + pat++; /* skip initial OR like egrep did */ + lxp->pat = pat; + lxp->flags = flags; + lxp->tok = ROP_OR; /* enables ^ as anchor */ + /* + * Get initial token. + */ + if (lex(lxp) != 0) + { + err:; + if (lp != 0) + { + libuxre_regdeltree(lp, 1); + lp = 0; + } + if (lxp->err == 0) + lxp->err = REG_ESPACE; + goto ret; + } + if (lxp->tok == ROP_END) + { + lxp->err = REG_NOPAT; + goto err; + } + if ((lp = alt(lxp)) == 0) /* parse entire RE */ + goto err; + if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0) + { + if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0) + goto err; + lp->right.info.sub = 0; + } + if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0) + goto err; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + goto err; + lp->parent = 0; +ret:; + if (lxp->clist != 0) + free(lxp->clist); + return lp; +} + +#ifdef REGDEBUG + +LIBUXRE_STATIC void +libuxre_regtree(Tree *tp, int n) +{ + const char *opstr; + char buf[32]; + int kind, next; + + if (n < 0) + next = -n + 2; + else + next = n + 2; + switch (tp->op) + { + case ROP_OR: + opstr = "|"; + kind = BINARY_ROP; + break; + case ROP_CAT: + opstr = "&"; + kind = BINARY_ROP; + break; + case ROP_STAR: + opstr = "*"; + kind = UNARY_ROP; + break; + case ROP_PLUS: + opstr = "+"; + kind = UNARY_ROP; + break; + case ROP_QUEST: + opstr = "?"; + kind = UNARY_ROP; + break; + case ROP_BRACE: + opstr = buf; + if (tp->right.info.num[1] == BRACE_INF) + { + sprintf(buf, "{%u,inf}", + (unsigned)tp->right.info.num[0]); + } + else + { + sprintf(buf, "{%u,%u}", + (unsigned)tp->right.info.num[0], + (unsigned)tp->right.info.num[1]); + } + kind = UNARY_ROP; + break; + case ROP_LP: + opstr = buf; + sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_RP: + opstr = buf; + sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_NOP: + opstr = "<NOP>"; + kind = LEAF_ROP; + break; + case ROP_BOL: + opstr = "<BOL>"; + kind = LEAF_ROP; + break; + case ROP_EOL: + opstr = "<EOL>"; + kind = LEAF_ROP; + break; + case ROP_ALL: + opstr = "<ALL>"; + kind = LEAF_ROP; + break; + case ROP_ANYCH: + opstr = "<ANYCH>"; + kind = LEAF_ROP; + break; + case ROP_NOTNL: + opstr = "<NOTNL>"; + kind = LEAF_ROP; + break; + case ROP_EMPTY: + opstr = "<MT>"; + kind = LEAF_ROP; + break; + case ROP_NONE: + opstr = "<NONE>"; + kind = LEAF_ROP; + break; + case ROP_BKT: + opstr = buf; + sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_BKTCOPY: + opstr = buf; + sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_LT: + opstr = "\\<"; + kind = LEAF_ROP; + break; + case ROP_GT: + opstr = "\\>"; + kind = LEAF_ROP; + break; + case ROP_REF: + opstr = buf; + sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub); + kind = LEAF_ROP; + break; + case ROP_END: + opstr = "<END>"; + kind = LEAF_ROP; + break; + default: + opstr = buf; + if (tp->op > UCHAR_MAX) + sprintf(buf, "W%#x", tp->op); + else if (tp->op <= 0) + sprintf(buf, "UNK=%u", tp->op); + else + sprintf(buf, "%c", tp->op); + kind = LEAF_ROP; + break; + } + if (kind == BINARY_ROP) + libuxre_regtree(tp->right.ptr, -next); + printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr); + if (kind != LEAF_ROP) + libuxre_regtree(tp->left.ptr, next); +} + +#endif /*REGDEBUG*/ diff --git a/libuxre/stubs.c b/libuxre/stubs.c @@ -0,0 +1,82 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)stubs.c 1.27 (gritter) 6/26/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* stubbed-out routines needed to complete the RE libc code */ + +#include "colldata.h" + +struct lc_collate * +libuxre_lc_collate(struct lc_collate *cp) +{ + static struct lc_collate curinfo = {0}; /* means CHF_ENCODED */ + + return &curinfo; +} + +#include "wcharm.h" + +LIBUXRE_STATIC int +libuxre_mb2wc(w_type *wt, const unsigned char *s) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, (const char *)&s[-1], MB_LEN_MAX)) > 0) + *wt = wc; + else if (len == 0) + *wt = '\0'; + else /*if (len < 0)*/ + *wt = (w_type)WEOF; + return len > 0 ? len - 1 : len; +} + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)libuxre.sl 1.27 (gritter) 6/26/05"; +/* SLIST */ +/* +_collelem.c: * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 +_collmult.c: * Sccsid @(#)_collmult.c 1.4 (gritter) 9/22/03 +bracket.c: * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 +colldata.h: * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 +onefile.c: * Sccsid @(#)onefile.c 1.1 (gritter) 9/22/03 +re.h: * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 +regcomp.c: * Sccsid @(#)regcomp.c 1.6 (gritter) 9/22/03 +regdfa.c: * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03 +regdfa.h: * Sccsid @(#)regdfa.h 1.3 (gritter) 9/22/03 +regerror.c: * Sccsid @(#)regerror.c 1.4 (gritter) 3/29/03 +regex.h: * Sccsid @(#)regex.h 1.13 (gritter) 2/6/05 +regexec.c: * Sccsid @(#)regexec.c 1.7 (gritter) 2/6/05 +regfree.c: * Sccsid @(#)regfree.c 1.3 (gritter) 9/22/03 +regnfa.c: * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 +regparse.c: * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 +wcharm.h: * Sccsid @(#)wcharm.h 1.12 (gritter) 10/18/03 +*/ diff --git a/libuxre/wcharm.h b/libuxre/wcharm.h @@ -0,0 +1,63 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)wcharm.h 1.12 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Stubbed-out wide character locale information */ + +#ifndef LIBUXRE_WCHARM_H +#define LIBUXRE_WCHARM_H + +#ifndef LIBUXRE_STATIC +#define LIBUXRE_STATIC +#endif + +#ifndef LIBUXRE_WUCHAR_T +#define LIBUXRE_WUCHAR_T +typedef unsigned int wuchar_type; +#endif + +#ifndef LIBUXRE_W_TYPE +#define LIBUXRE_W_TYPE +typedef int w_type; +#endif + +#include <wchar.h> +#include <wctype.h> +#include <stdlib.h> + +#ifdef notdef +#define ISONEBYTE(ch) ((ch), 1) + +#define libuxre_mb2wc(wp, cp) ((wp), (cp), 0) +#endif /* notdef */ + +#define ISONEBYTE(ch) (((ch) & 0200) == 0 || mb_cur_max == 1) + +#define to_lower(ch) (mb_cur_max > 1 ? towlower(ch) : tolower(ch)) +#define to_upper(ch) (mb_cur_max > 1 ? towupper(ch) : toupper(ch)) + +LIBUXRE_STATIC int libuxre_mb2wc(w_type *, const unsigned char *); + +#endif /* !LIBUXRE_WCHARM_H */ diff --git a/mk/libbio/NOTICE b/mk/libbio/NOTICE @@ -0,0 +1,34 @@ +This copyright NOTICE applies to all files in this directory and +subdirectories, unless another copyright notice appears in a given +file or subdirectory. If you take substantial code from this software to use in +other programs, you must somehow include with it an appropriate +copyright notice that includes the copyright notice and the other +notices below. It is fine (and often tidier) to do that in a separate +file such as NOTICE, LICENCE or COPYING. + + Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. + Revisions Copyright © 2000-2005 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libbio/README b/mk/libbio/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libbio/bbuffered.c b/mk/libbio/bbuffered.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <bio.h> + +int +Bbuffered(Biobuf *bp) +{ + switch(bp->state) { + case Bracteof: + case Bractive: + return -bp->icount; + + case Bwactive: + return bp->bsize + bp->ocount; + + case Binactive: + return 0; + } + fprint(2, "Bbuffered: unknown state %d\n", bp->state); + return 0; +} diff --git a/mk/libbio/bcat.c b/mk/libbio/bcat.c @@ -0,0 +1,46 @@ +#include <fmt.h> +#include "bio.h" + +Biobuf bout; + +void +bcat(Biobuf *b, char *name) +{ + char buf[1000]; + int n; + + while((n = Bread(b, buf, sizeof buf)) > 0){ + if(Bwrite(&bout, buf, n) < 0) + fprint(2, "writing during %s: %r\n", name); + } + if(n < 0) + fprint(2, "reading %s: %r\n", name); +} + +int +main(int argc, char **argv) +{ + int i; + Biobuf b, *bp; + Fmt fmt; + + Binit(&bout, 1, O_WRONLY); + Bfmtinit(&fmt, &bout); + fmtprint(&fmt, "hello, world\n"); + Bfmtflush(&fmt); + + if(argc == 1){ + Binit(&b, 0, O_RDONLY); + bcat(&b, "<stdin>"); + }else{ + for(i=1; i<argc; i++){ + if((bp = Bopen(argv[i], O_RDONLY)) == 0){ + fprint(2, "Bopen %s: %r\n", argv[i]); + continue; + } + bcat(bp, argv[i]); + Bterm(bp); + } + } + exit(0); +} diff --git a/mk/libbio/bfildes.c b/mk/libbio/bfildes.c @@ -0,0 +1,9 @@ +#include "lib9.h" +#include <bio.h> + +int +Bfildes(Biobuf *bp) +{ + + return bp->fid; +} diff --git a/mk/libbio/bflush.c b/mk/libbio/bflush.c @@ -0,0 +1,33 @@ +#include "lib9.h" +#include <bio.h> + +int +Bflush(Biobuf *bp) +{ + int n, c; + + switch(bp->state) { + case Bwactive: + n = bp->bsize+bp->ocount; + if(n == 0) + return 0; + c = write(bp->fid, bp->bbuf, n); + if(n == c) { + bp->offset += n; + bp->ocount = -bp->bsize; + return 0; + } + bp->state = Binactive; + bp->ocount = 0; + break; + + case Bracteof: + bp->state = Bractive; + + case Bractive: + bp->icount = 0; + bp->gbuf = bp->ebuf; + return 0; + } + return Beof; +} diff --git a/mk/libbio/bgetc.c b/mk/libbio/bgetc.c @@ -0,0 +1,53 @@ +#include "lib9.h" +#include <bio.h> + +int +Bgetc(Biobuf *bp) +{ + int i; + +loop: + i = bp->icount; + if(i != 0) { + bp->icount = i+1; + return bp->ebuf[i]; + } + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + return Beof; + } + /* + * get next buffer, try to keep Bungetsize + * characters pre-catenated from the previous + * buffer to allow that many ungets. + */ + memmove(bp->bbuf-Bungetsize, bp->ebuf-Bungetsize, Bungetsize); + i = read(bp->fid, bp->bbuf, bp->bsize); + bp->gbuf = bp->bbuf; + if(i <= 0) { + bp->state = Bracteof; + if(i < 0) + bp->state = Binactive; + return Beof; + } + if(i < bp->bsize) { + memmove(bp->ebuf-i-Bungetsize, bp->bbuf-Bungetsize, i+Bungetsize); + bp->gbuf = bp->ebuf-i; + } + bp->icount = -i; + bp->offset += i; + goto loop; +} + +int +Bungetc(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount--; + return 1; +} diff --git a/mk/libbio/bgetd.c b/mk/libbio/bgetd.c @@ -0,0 +1,36 @@ +#include "lib9.h" +#include <bio.h> + +struct bgetd +{ + Biobuf* b; + int eof; +}; + +static int +Bgetdf(void *vp) +{ + int c; + struct bgetd *bg = vp; + + c = Bgetc(bg->b); + if(c == Beof) + bg->eof = 1; + return c; +} + +int +Bgetd(Biobuf *bp, double *dp) +{ + double d; + struct bgetd b; + + b.b = bp; + b.eof = 0; + d = fmtcharstod(Bgetdf, &b); + if(b.eof) + return -1; + Bungetc(bp); + *dp = d; + return 1; +} diff --git a/mk/libbio/bgetrune.c b/mk/libbio/bgetrune.c @@ -0,0 +1,47 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +long +Bgetrune(Biobuf *bp) +{ + int c, i; + Rune rune; + char str[UTFmax]; + + c = Bgetc(bp); + if(c < Runeself) { /* one char */ + bp->runesize = 1; + return c; + } + str[0] = c; + + for(i=1;;) { + c = Bgetc(bp); + if(c < 0) + return c; + str[i++] = c; + + if(fullrune(str, i)) { + bp->runesize = chartorune(&rune, str); + while(i > bp->runesize) { + Bungetc(bp); + i--; + } + return rune; + } + } +} + +int +Bungetrune(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount -= bp->runesize; + bp->runesize = 0; + return 1; +} diff --git a/mk/libbio/binit.c b/mk/libbio/binit.c @@ -0,0 +1,153 @@ +#include "lib9.h" +#include <bio.h> + +enum +{ + MAXBUFS = 20 +}; + +static Biobuf* wbufs[MAXBUFS]; +static int atexitflag; + +static +void +batexit(void) +{ + Biobuf *bp; + int i; + + for(i=0; i<MAXBUFS; i++) { + bp = wbufs[i]; + if(bp != 0) { + wbufs[i] = 0; + Bflush(bp); + } + } +} + +static +void +deinstall(Biobuf *bp) +{ + int i; + + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == bp) + wbufs[i] = 0; +} + +static +void +install(Biobuf *bp) +{ + int i; + + deinstall(bp); + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == 0) { + wbufs[i] = bp; + break; + } + if(atexitflag == 0) { + atexitflag = 1; + atexit(batexit); + } +} + +int +Binits(Biobuf *bp, int f, int mode, unsigned char *p, int size) +{ + + p += Bungetsize; /* make room for Bungets */ + size -= Bungetsize; + + switch(mode&~(OCEXEC|ORCLOSE|OTRUNC)) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return Beof; + + case OREAD: + bp->state = Bractive; + bp->ocount = 0; + break; + + case OWRITE: + install(bp); + bp->state = Bwactive; + bp->ocount = -size; + break; + } + bp->bbuf = p; + bp->ebuf = p+size; + bp->bsize = size; + bp->icount = 0; + bp->gbuf = bp->ebuf; + bp->fid = f; + bp->flag = 0; + bp->rdline = 0; + bp->offset = 0; + bp->runesize = 0; + return 0; +} + + +int +Binit(Biobuf *bp, int f, int mode) +{ + return Binits(bp, f, mode, bp->b, sizeof(bp->b)); +} + +Biobuf* +Bfdopen(int f, int mode) +{ + Biobuf *bp; + + bp = malloc(sizeof(Biobuf)); + if(bp == 0) + return 0; + Binits(bp, f, mode, bp->b, sizeof(bp->b)); + bp->flag = Bmagic; + return bp; +} + +Biobuf* +Bopen(char *name, int mode) +{ + Biobuf *bp; + int f; + + switch(mode&~(OCEXEC|ORCLOSE|OTRUNC)) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return 0; + + case OREAD: + f = open(name, mode); + if(f < 0) + return 0; + break; + + case OWRITE: + f = create(name, mode, 0666); + if(f < 0) + return 0; + } + bp = Bfdopen(f, mode); + if(bp == 0) + close(f); + return bp; +} + +int +Bterm(Biobuf *bp) +{ + + deinstall(bp); + Bflush(bp); + if(bp->flag == Bmagic) { + bp->flag = 0; + close(bp->fid); + free(bp); + } + return 0; +} diff --git a/mk/libbio/bio.3 b/mk/libbio/bio.3 @@ -0,0 +1,371 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH BIO 3 +.SH NAME +Bopen, Bfdopen, Binit, Binits, Brdline, Brdstr, Bgetc, Bgetrune, Bgetd, Bungetc, Bungetrune, Bread, Bseek, Boffset, Bfildes, Blinelen, Bputc, Bputrune, Bprint, Bvprint, Bwrite, Bflush, Bterm, Bbuffered \- buffered input/output +.SH SYNOPSIS +.ta \w'\fLBiobuf* 'u +.B #include <utf.h> +.br +.B #include <fmt.h> +.br +.B #include <bio.h> +.PP +.B +Biobuf* Bopen(char *file, int mode) +.PP +.B +Biobuf* Bfdopen(int fd, int mode) +.PP +.B +int Binit(Biobuf *bp, int fd, int mode) +.PP +.B +int Binits(Biobufhdr *bp, int fd, int mode, uchar *buf, int size) +.PP +.B +int Bterm(Biobufhdr *bp) +.PP +.B +int Bprint(Biobufhdr *bp, char *format, ...) +.PP +.B +int Bvprint(Biobufhdr *bp, char *format, va_list arglist); +.PP +.B +void* Brdline(Biobufhdr *bp, int delim) +.PP +.B +char* Brdstr(Biobufhdr *bp, int delim, int nulldelim) +.PP +.B +int Blinelen(Biobufhdr *bp) +.PP +.B +vlong Boffset(Biobufhdr *bp) +.PP +.B +int Bfildes(Biobufhdr *bp) +.PP +.B +int Bgetc(Biobufhdr *bp) +.PP +.B +long Bgetrune(Biobufhdr *bp) +.PP +.B +int Bgetd(Biobufhdr *bp, double *d) +.PP +.B +int Bungetc(Biobufhdr *bp) +.PP +.B +int Bungetrune(Biobufhdr *bp) +.PP +.B +vlong Bseek(Biobufhdr *bp, vlong n, int type) +.PP +.B +int Bputc(Biobufhdr *bp, int c) +.PP +.B +int Bputrune(Biobufhdr *bp, long c) +.PP +.B +long Bread(Biobufhdr *bp, void *addr, long nbytes) +.PP +.B +long Bwrite(Biobufhdr *bp, void *addr, long nbytes) +.PP +.B +int Bflush(Biobufhdr *bp) +.PP +.B +int Bbuffered(Biobufhdr *bp) +.PP +.SH DESCRIPTION +These routines implement fast buffered I/O. +I/O on different file descriptors is independent. +.PP +.I Bopen +opens +.I file +for mode +.B O_RDONLY +or creates for mode +.BR O_WRONLY . +It calls +.IR malloc (3) +to allocate a buffer. +.PP +.I Bfdopen +allocates a buffer for the already-open file descriptor +.I fd +for mode +.B O_RDONLY +or +.BR O_WRONLY . +It calls +.IR malloc (3) +to allocate a buffer. +.PP +.I Binit +initializes a standard size buffer, type +.IR Biobuf , +with the open file descriptor passed in +by the user. +.I Binits +initializes a non-standard size buffer, type +.IR Biobufhdr , +with the open file descriptor, +buffer area, and buffer size passed in +by the user. +.I Biobuf +and +.I Biobufhdr +are related by the declaration: +.IP +.EX +typedef struct Biobuf Biobuf; +struct Biobuf +{ + Biobufhdr; + uchar b[Bungetsize+Bsize]; +}; +.EE +.PP +Arguments +of types pointer to Biobuf and pointer to Biobufhdr +can be used interchangeably in the following routines. +.PP +.IR Bopen , +.IR Binit , +or +.I Binits +should be called before any of the +other routines on that buffer. +.I Bfildes +returns the integer file descriptor of the associated open file. +.PP +.I Bterm +flushes the buffer for +.IR bp . +If the buffer was allocated by +.IR Bopen , +the buffer is +.I freed +and the file is closed. +.PP +.I Brdline +reads a string from the file associated with +.I bp +up to and including the first +.I delim +character. +The delimiter character at the end of the line is +not altered. +.I Brdline +returns a pointer to the start of the line or +.L 0 +on end-of-file or read error. +.I Blinelen +returns the length (including the delimiter) +of the most recent string returned by +.IR Brdline . +.PP +.I Brdstr +returns a +.IR malloc (3)-allocated +buffer containing the next line of input delimited by +.IR delim , +terminated by a NUL (0) byte. +Unlike +.IR Brdline , +which returns when its buffer is full even if no delimiter has been found, +.I Brdstr +will return an arbitrarily long line in a single call. +If +.I nulldelim +is set, the terminal delimiter will be overwritten with a NUL. +After a successful call to +.IR Brdstr , +the return value of +.I Blinelen +will be the length of the returned buffer, excluding the NUL. +.PP +.I Bgetc +returns the next character from +.IR bp , +or a negative value +at end of file. +.I Bungetc +may be called immediately after +.I Bgetc +to allow the same character to be reread. +.PP +.I Bgetrune +calls +.I Bgetc +to read the bytes of the next +.SM UTF +sequence in the input stream and returns the value of the rune +represented by the sequence. +It returns a negative value +at end of file. +.I Bungetrune +may be called immediately after +.I Bgetrune +to allow the same +.SM UTF +sequence to be reread as either bytes or a rune. +.I Bungetc +and +.I Bungetrune +may back up a maximum of five bytes. +.PP +.I Bgetd +uses +.I fmtcharstod +(see +.IR fmtstrtod (3)) +and +.I Bgetc +to read the formatted +floating-point number in the input stream, +skipping initial blanks and tabs. +The value is stored in +.BR *d. +.PP +.I Bread +reads +.I nbytes +of data from +.I bp +into memory starting at +.IR addr . +The number of bytes read is returned on success +and a negative value is returned if a read error occurred. +.PP +.I Bseek +applies +.IR lseek (2) +to +.IR bp . +It returns the new file offset. +.I Boffset +returns the file offset of the next character to be processed. +.PP +.I Bputc +outputs the low order 8 bits of +.I c +on +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, a zero is returned. +.PP +.I Bputrune +calls +.I Bputc +to output the low order +16 bits of +.I c +as a rune +in +.SM UTF +format +on the output stream. +.PP +.I Bprint +is a buffered interface to +.IR print (3). +If this causes a +.IR write +to occur and there is an error, +a negative value +.RB ( Beof ) +is returned. +Otherwise, the number of bytes output is returned. +.I Bvprint +does the same except it takes as argument a +.B va_list +parameter, so it can be called within a variadic function. +.PP +.I Bwrite +outputs +.I nbytes +of data starting at +.I addr +to +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, the number of bytes written is returned. +.PP +.I Bflush +causes any buffered output associated with +.I bp +to be written. +The return is as for +.IR Bputc . +.I Bflush +is called on +exit for every buffer still open +for writing. +.PP +.I Bbuffered +returns the number of bytes in the buffer. +When reading, this is the number of bytes still available from the last +read on the file; when writing, it is the number of bytes ready to be +written. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR open (2), +.IR print (3), +.IR atexit (3), +.IR utf (7), +.SH DIAGNOSTICS +.I Bio +routines that return integers yield +.B Beof +if +.I bp +is not the descriptor of an open file. +.I Bopen +returns zero if the file cannot be opened in the given mode. +All routines set +.I errstr +on error. +.SH BUGS +.I Brdline +returns an error on strings longer than the buffer associated +with the file +and also if the end-of-file is encountered +before a delimiter. +.I Blinelen +will tell how many characters are available +in these cases. +In the case of a true end-of-file, +.I Blinelen +will return zero. +At the cost of allocating a buffer, +.I Brdstr +sidesteps these issues. +.PP +The data returned by +.I Brdline +may be overwritten by calls to any other +.I bio +routine on the same +.IR bp. diff --git a/mk/libbio/bio.h b/mk/libbio/bio.h @@ -0,0 +1,91 @@ +#ifndef _BIO_H_ +#define _BIO_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef AUTOLIB +AUTOLIB(bio) +#endif + +#include <fcntl.h> /* for O_RDONLY, O_WRONLY */ + +typedef struct Biobuf Biobuf; + +enum +{ + Bsize = 8*1024, + Bungetsize = 4, /* space for ungetc */ + Bmagic = 0x314159, + Beof = -1, + Bbad = -2, + + Binactive = 0, /* states */ + Bractive, + Bwactive, + Bracteof, + + Bend +}; + +struct Biobuf +{ + int icount; /* neg num of bytes at eob */ + int ocount; /* num of bytes at bob */ + int rdline; /* num of bytes after rdline */ + int runesize; /* num of bytes of last getrune */ + int state; /* r/w/inactive */ + int fid; /* open file */ + int flag; /* magic if malloc'ed */ + long long offset; /* offset of buffer in file */ + int bsize; /* size of buffer */ + unsigned char* bbuf; /* pointer to beginning of buffer */ + unsigned char* ebuf; /* pointer to end of buffer */ + unsigned char* gbuf; /* pointer to good data in buf */ + unsigned char b[Bungetsize+Bsize]; +}; + +#define BGETC(bp)\ + ((bp)->icount?(bp)->bbuf[(bp)->bsize+(bp)->icount++]:Bgetc((bp))) +#define BPUTC(bp,c)\ + ((bp)->ocount?(bp)->bbuf[(bp)->bsize+(bp)->ocount++]=(c),0:Bputc((bp),(c))) +#define BOFFSET(bp)\ + (((bp)->state==Bractive)?\ + (bp)->offset + (bp)->icount:\ + (((bp)->state==Bwactive)?\ + (bp)->offset + ((bp)->bsize + (bp)->ocount):\ + -1)) +#define BLINELEN(bp)\ + (bp)->rdline +#define BFILDES(bp)\ + (bp)->fid + +int Bbuffered(Biobuf*); +Biobuf* Bfdopen(int, int); +int Bfildes(Biobuf*); +int Bflush(Biobuf*); +int Bgetc(Biobuf*); +int Bgetd(Biobuf*, double*); +long Bgetrune(Biobuf*); +int Binit(Biobuf*, int, int); +int Binits(Biobuf*, int, int, unsigned char*, int); +int Blinelen(Biobuf*); +long long Boffset(Biobuf*); +Biobuf* Bopen(char*, int); +int Bprint(Biobuf*, char*, ...); +int Bputc(Biobuf*, int); +int Bputrune(Biobuf*, long); +void* Brdline(Biobuf*, int); +char* Brdstr(Biobuf*, int, int); +long Bread(Biobuf*, void*, long); +long long Bseek(Biobuf*, long long, int); +int Bterm(Biobuf*); +int Bungetc(Biobuf*); +int Bungetrune(Biobuf*); +long Bwrite(Biobuf*, void*, long); +int Bvprint(Biobuf*, char*, va_list); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libbio/boffset.c b/mk/libbio/boffset.c @@ -0,0 +1,25 @@ +#include "lib9.h" +#include <bio.h> + +vlong +Boffset(Biobuf *bp) +{ + vlong n; + + switch(bp->state) { + default: + fprint(2, "Boffset: unknown state %d\n", bp->state); + n = Beof; + break; + + case Bracteof: + case Bractive: + n = bp->offset + bp->icount; + break; + + case Bwactive: + n = bp->offset + (bp->bsize + bp->ocount); + break; + } + return n; +} diff --git a/mk/libbio/bprint.c b/mk/libbio/bprint.c @@ -0,0 +1,14 @@ +#include "lib9.h" +#include <bio.h> + +int +Bprint(Biobuf *bp, char *fmt, ...) +{ + int n; + va_list arg; + + va_start(arg, fmt); + n = Bvprint(bp, fmt, arg); + va_end(arg); + return n; +} diff --git a/mk/libbio/bputc.c b/mk/libbio/bputc.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <bio.h> + +int +Bputc(Biobuf *bp, int c) +{ + int i; + + for(;;) { + i = bp->ocount; + if(i) { + bp->ebuf[i++] = c; + bp->ocount = i; + return 0; + } + if(Bflush(bp) == Beof) + break; + } + return Beof; +} diff --git a/mk/libbio/bputrune.c b/mk/libbio/bputrune.c @@ -0,0 +1,23 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +int +Bputrune(Biobuf *bp, long c) +{ + Rune rune; + char str[UTFmax]; + int n; + + rune = c; + if(rune < Runeself) { + Bputc(bp, rune); + return 1; + } + n = runetochar(str, &rune); + if(n == 0) + return Bbad; + if(Bwrite(bp, str, n) != n) + return Beof; + return n; +} diff --git a/mk/libbio/brdline.c b/mk/libbio/brdline.c @@ -0,0 +1,94 @@ +#include "lib9.h" +#include <bio.h> + +void* +Brdline(Biobuf *bp, int delim) +{ + char *ip, *ep; + int i, j; + + i = -bp->icount; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->rdline = 0; + bp->gbuf = bp->ebuf; + return 0; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->rdline = j; + bp->icount += j; + return ip; + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0) { + /* + * end of file with no delim + */ + memmove(bp->ebuf-i, bp->bbuf, i); + bp->rdline = i; + bp->icount = -i; + bp->gbuf = bp->ebuf-i; + return 0; + } + bp->offset += j; + i += j; + ep = memchr(ip, delim, j); + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->rdline = j; + bp->icount = j - i; + return ip; + } + ip += j; + } + + /* + * full buffer without finding + */ + bp->rdline = bp->bsize; + bp->icount = -bp->bsize; + bp->gbuf = bp->bbuf; + return 0; +} + +int +Blinelen(Biobuf *bp) +{ + + return bp->rdline; +} diff --git a/mk/libbio/brdstr.c b/mk/libbio/brdstr.c @@ -0,0 +1,111 @@ +#include "lib9.h" +#include <bio.h> + +static char* +badd(char *p, int *np, char *data, int ndata, int delim, int nulldelim) +{ + int n; + + n = *np; + p = realloc(p, n+ndata+1); + if(p){ + memmove(p+n, data, ndata); + n += ndata; + if(n>0 && nulldelim && p[n-1]==delim) + p[--n] = '\0'; + else + p[n] = '\0'; + *np = n; + } + return p; +} + +char* +Brdstr(Biobuf *bp, int delim, int nulldelim) +{ + char *ip, *ep, *p; + int i, j; + + i = -bp->icount; + bp->rdline = 0; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->gbuf = bp->ebuf; + return nil; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->icount += j; + return badd(nil, &bp->rdline, ip, j, delim, nulldelim); + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + p = nil; + for(;;){ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0 && i == 0) + return p; + if(j <= 0 && i > 0){ + /* + * end of file but no delim. pretend we got a delim + * by making the delim \0 and smashing it with nulldelim. + */ + j = 1; + ep = ip; + delim = '\0'; + nulldelim = 1; + *ep = delim; /* there will be room for this */ + }else{ + bp->offset += j; + ep = memchr(ip, delim, j); + } + i += j; + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->icount = j - i; + return badd(p, &bp->rdline, ip, j, delim, nulldelim); + } + ip += j; + } + + /* + * full buffer without finding; add to user string and continue + */ + p = badd(p, &bp->rdline, (char*)bp->bbuf, bp->bsize, 0, 0); + i = 0; + bp->icount = 0; + bp->gbuf = bp->ebuf; + } +} diff --git a/mk/libbio/bread.c b/mk/libbio/bread.c @@ -0,0 +1,45 @@ +#include "lib9.h" +#include <bio.h> + +long +Bread(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, ic; + + p = ap; + c = count; + ic = bp->icount; + + while(c > 0) { + n = -ic; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bractive) + break; + i = read(bp->fid, bp->bbuf, bp->bsize); + if(i <= 0) { + bp->state = Bracteof; + if(i < 0) + bp->state = Binactive; + break; + } + bp->gbuf = bp->bbuf; + bp->offset += i; + if(i < bp->bsize) { + memmove(bp->ebuf-i, bp->bbuf, i); + bp->gbuf = bp->ebuf-i; + } + ic = -i; + continue; + } + memmove(p, bp->ebuf+ic, n); + c -= n; + ic += n; + p += n; + } + bp->icount = ic; + return count-c; +} diff --git a/mk/libbio/bseek.c b/mk/libbio/bseek.c @@ -0,0 +1,60 @@ +#include "lib9.h" +#include <bio.h> + +long long +Bseek(Biobuf *bp, long long offset, int base) +{ + vlong n, d; + int bufsz; + + switch(bp->state) { + default: + fprint(2, "Bseek: unknown state %d\n", bp->state); + return Beof; + + case Bracteof: + bp->state = Bractive; + bp->icount = 0; + bp->gbuf = bp->ebuf; + + case Bractive: + n = offset; + if(base == 1) { + n += Boffset(bp); + base = 0; + } + + /* + * try to seek within buffer + */ + if(base == 0) { + d = n - Boffset(bp); + bufsz = bp->ebuf - bp->gbuf; + if(-bufsz <= d && d <= bufsz){ + bp->icount += d; + if(d >= 0) { + if(bp->icount <= 0) + return n; + } else { + if(bp->ebuf - bp->gbuf >= -bp->icount) + return n; + } + } + } + + /* + * reset the buffer + */ + n = lseek(bp->fid, n, base); + bp->icount = 0; + bp->gbuf = bp->ebuf; + break; + + case Bwactive: + Bflush(bp); + n = seek(bp->fid, offset, base); + break; + } + bp->offset = n; + return n; +} diff --git a/mk/libbio/bvprint.c b/mk/libbio/bvprint.c @@ -0,0 +1,38 @@ +#include "lib9.h" +#include <bio.h> + +static int +fmtBflush(Fmt *f) +{ + Biobuf *bp; + + bp = f->farg; + bp->ocount = (char*)f->to - (char*)f->stop; + if(Bflush(bp) < 0) + return 0; + f->stop = bp->ebuf; + f->to = (char*)f->stop + bp->ocount; + f->start = f->to; + return 1; +} + +int +Bvprint(Biobuf *bp, char *fmt, va_list arg) +{ + int n; + Fmt f; + + f.runes = 0; + f.stop = bp->ebuf; + f.start = (char*)f.stop + bp->ocount; + f.to = f.start; + f.flush = fmtBflush; + f.farg = bp; + f.nfmt = 0; + fmtlocaleinit(&f, nil, nil, nil); + n = fmtvprint(&f, fmt, arg); + bp->ocount = (char*)f.to - (char*)f.stop; + if(n == 0) + n = f.nfmt; + return n; +} diff --git a/mk/libbio/bwrite.c b/mk/libbio/bwrite.c @@ -0,0 +1,38 @@ +#include "lib9.h" +#include <bio.h> + +long +Bwrite(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, oc; + + p = ap; + c = count; + oc = bp->ocount; + + while(c > 0) { + n = -oc; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bwactive) + return Beof; + i = write(bp->fid, bp->bbuf, bp->bsize); + if(i != bp->bsize) { + bp->state = Binactive; + return Beof; + } + bp->offset += i; + oc = -bp->bsize; + continue; + } + memmove(bp->ebuf+oc, p, n); + oc += n; + c -= n; + p += n; + } + bp->ocount = oc; + return count-c; +} diff --git a/mk/libbio/depsinc.mk b/mk/libbio/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libbio_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libbio_DEPDIR -lbio diff --git a/mk/libbio/lib9.h b/mk/libbio/lib9.h @@ -0,0 +1,26 @@ +#define _FILE_OFFSET_BITS 64 +#define _LARGEFILE64_SOURCE + +#include <utf.h> +#include <fmt.h> + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY + +#define OCEXEC 0 +#define ORCLOSE 0 +#define OTRUNC 0 + +#define nil ((void*)0) + +typedef long long vlong; +typedef unsigned long long uvlong; + +#define seek(fd, offset, whence) lseek(fd, offset, whence) +#define create(name, mode, perm) creat(name, perm) + diff --git a/mk/libbio/mkfile b/mk/libbio/mkfile @@ -0,0 +1,23 @@ +LIB = libbio.a +LOBJ = bbuffered.o \ + bfildes.o \ + bflush.o \ + bgetc.o \ + bgetd.o \ + bgetrune.o \ + binit.o \ + boffset.o \ + bprint.o \ + bvprint.o \ + bputc.o \ + bputrune.o \ + brdline.o \ + brdstr.o \ + bread.o \ + bseek.o bwrite.o + +LOCAL_CFLAGS = -I"$PREFIX"/include -I. +DEPS = libutf libfmt + +<$mkbuild/mk.default + diff --git a/mk/libfmt/NOTICE b/mk/libfmt/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libfmt/README b/mk/libfmt/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libfmt/charstod.c b/mk/libfmt/charstod.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Reads a floating-point number by interpreting successive characters + * returned by (*f)(vp). The last call it makes to f terminates the + * scan, so is not a character in the number. It may therefore be + * necessary to back up the input stream up one byte after calling charstod. + */ + +double +fmtcharstod(int(*f)(void*), void *vp) +{ + double num, dem; + int neg, eneg, dig, exp, c; + + num = 0; + neg = 0; + dig = 0; + exp = 0; + eneg = 0; + + c = (*f)(vp); + while(c == ' ' || c == '\t') + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-') + neg = 1; + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + c = (*f)(vp); + } + if(c == '.') + c = (*f)(vp); + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + dig++; + c = (*f)(vp); + } + if(c == 'e' || c == 'E'){ + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-'){ + dig = -dig; + eneg = 1; + } + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + exp = exp*10 + c-'0'; + c = (*f)(vp); + } + } + exp -= dig; + if(exp < 0){ + exp = -exp; + eneg = !eneg; + } + dem = __fmtpow10(exp); + if(eneg) + num /= dem; + else + num *= dem; + if(neg) + return -num; + return num; +} diff --git a/mk/libfmt/depsinc.mk b/mk/libfmt/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libfmt_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libfmt_DEPDIR -lfmt diff --git a/mk/libfmt/dofmt.c b/mk/libfmt/dofmt.c @@ -0,0 +1,617 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ +int +dofmt(Fmt *f, char *fmt) +{ + Rune rune, *rt, *rs; + int r; + char *t, *s; + int n, nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself) + fmt++; + else{ + fmt += chartorune(&rune, fmt); + r = rune; + } + FMTRCHAR(f, rt, rs, r); + } + fmt++; + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself){ + FMTCHAR(f, t, s, r); + fmt++; + }else{ + n = chartorune(&rune, fmt); + if(t + n > s){ + t = (char*)__fmtflush(f, t, n); + if(t != nil) + s = (char*)f->stop; + else + return -1; + } + while(n--) + *t++ = *fmt++; + } + } + fmt++; + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = (char*)__fmtdispatch(f, fmt, 0); + if(fmt == nil) + return -1; + } +} + +void * +__fmtflush(Fmt *f, void *t, int len) +{ + if(f->runes) + f->nfmt += (Rune*)t - (Rune*)f->to; + else + f->nfmt += (char*)t - (char *)f->to; + f->to = t; + if(f->flush == 0 || (*f->flush)(f) == 0 || (char*)f->to + len > (char*)f->stop){ + f->stop = f->to; + return nil; + } + return f->to; +} + +/* + * put a formatted block of memory sz bytes long of n runes into the output buffer, + * left/right justified in a field of at least f->width characters (if FmtWidth is set) + */ +int +__fmtpad(Fmt *f, int n) +{ + char *t, *s; + int i; + + t = (char*)f->to; + s = (char*)f->stop; + for(i = 0; i < n; i++) + FMTCHAR(f, t, s, ' '); + f->nfmt += t - (char *)f->to; + f->to = t; + return 0; +} + +int +__rfmtpad(Fmt *f, int n) +{ + Rune *t, *s; + int i; + + t = (Rune*)f->to; + s = (Rune*)f->stop; + for(i = 0; i < n; i++) + FMTRCHAR(f, t, s, ' '); + f->nfmt += t - (Rune *)f->to; + f->to = t; + return 0; +} + +int +__fmtcpy(Fmt *f, const void *vm, int n, int sz) +{ + Rune *rt, *rs, r; + char *t, *s, *m, *me; + ulong fl; + int nc, w; + + m = (char*)vm; + me = m + sz; + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +int +__fmtrcpy(Fmt *f, const void *vm, int n) +{ + Rune r, *m, *me, *rt, *rs; + char *t, *s; + ulong fl; + int w; + + m = (Rune*)vm; + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(me = m + n; m < me; m++) + FMTRCHAR(f, rt, rs, *m); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(me = m + n; m < me; m++){ + r = *m; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +/* fmt out one character */ +int +__charfmt(Fmt *f) +{ + char x[1]; + + x[0] = va_arg(f->args, int); + f->prec = 1; + return __fmtcpy(f, (const char*)x, 1, 1); +} + +/* fmt out one rune */ +int +__runefmt(Fmt *f) +{ + Rune x[1]; + + x[0] = va_arg(f->args, int); + return __fmtrcpy(f, (const void*)x, 1); +} + +/* public helper routine: fmt out a null terminated string already in hand */ +int +fmtstrcpy(Fmt *f, char *s) +{ + int i, j; + + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ +#ifdef PLAN9PORT + Rune r; + i = 0; + for(j=0; j<f->prec && s[i]; j++) + i += chartorune(&r, s+i); +#else + /* ANSI requires precision in bytes, not Runes */ + for(i=0; i<f->prec; i++) + if(s[i] == 0) + break; + j = utfnlen(s, i); /* won't print partial at end */ +#endif + return __fmtcpy(f, s, j, i); + } + return __fmtcpy(f, s, utflen(s), strlen(s)); +} + +/* fmt out a null terminated utf string */ +int +__strfmt(Fmt *f) +{ + char *s; + + s = va_arg(f->args, char *); + return fmtstrcpy(f, s); +} + +/* public helper routine: fmt out a null terminated rune string already in hand */ +int +fmtrunestrcpy(Fmt *f, Rune *s) +{ + Rune *e; + int n, p; + + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ + p = f->prec; + for(n = 0; n < p; n++) + if(s[n] == 0) + break; + }else{ + for(e = s; *e; e++) + ; + n = e - s; + } + return __fmtrcpy(f, s, n); +} + +/* fmt out a null terminated rune string */ +int +__runesfmt(Fmt *f) +{ + Rune *s; + + s = va_arg(f->args, Rune *); + return fmtrunestrcpy(f, s); +} + +/* fmt a % */ +int +__percentfmt(Fmt *f) +{ + Rune x[1]; + + x[0] = f->r; + f->prec = 1; + return __fmtrcpy(f, (const void*)x, 1); +} + +/* fmt an integer */ +int +__ifmt(Fmt *f) +{ + char buf[140], *p, *conv; + /* 140: for 64 bits of binary + 3-byte sep every 4 digits */ + uvlong vu; + ulong u; + int neg, base, i, n, fl, w, isv; + int ndig, len, excess, bytelen; + char *grouping; + char *thousands; + + neg = 0; + fl = f->flags; + isv = 0; + vu = 0; + u = 0; +#ifndef PLAN9PORT + /* + * Unsigned verbs for ANSI C + */ + switch(f->r){ + case 'o': + case 'p': + case 'u': + case 'x': + case 'X': + fl |= FmtUnsigned; + fl &= ~(FmtSign|FmtSpace); + break; + } +#endif + if(f->r == 'p'){ + u = (ulong)va_arg(f->args, void*); + f->r = 'x'; + fl |= FmtUnsigned; + }else if(fl & FmtVLong){ + isv = 1; + if(fl & FmtUnsigned) + vu = va_arg(f->args, uvlong); + else + vu = va_arg(f->args, vlong); + }else if(fl & FmtLong){ + if(fl & FmtUnsigned) + u = va_arg(f->args, ulong); + else + u = va_arg(f->args, long); + }else if(fl & FmtByte){ + if(fl & FmtUnsigned) + u = (uchar)va_arg(f->args, int); + else + u = (char)va_arg(f->args, int); + }else if(fl & FmtShort){ + if(fl & FmtUnsigned) + u = (ushort)va_arg(f->args, int); + else + u = (short)va_arg(f->args, int); + }else{ + if(fl & FmtUnsigned) + u = va_arg(f->args, uint); + else + u = va_arg(f->args, int); + } + conv = "0123456789abcdef"; + grouping = "\4"; /* for hex, octal etc. (undefined by spec but nice) */ + thousands = f->thousands; + switch(f->r){ + case 'd': + case 'i': + case 'u': + base = 10; + grouping = f->grouping; + break; + case 'X': + conv = "0123456789ABCDEF"; + /* fall through */ + case 'x': + base = 16; + thousands = ":"; + break; + case 'b': + base = 2; + thousands = ":"; + break; + case 'o': + base = 8; + break; + default: + return -1; + } + if(!(fl & FmtUnsigned)){ + if(isv && (vlong)vu < 0){ + vu = -(vlong)vu; + neg = 1; + }else if(!isv && (long)u < 0){ + u = -(long)u; + neg = 1; + } + } + p = buf + sizeof buf - 1; + n = 0; /* in runes */ + excess = 0; /* number of bytes > number runes */ + ndig = 0; + len = utflen(thousands); + bytelen = strlen(thousands); + if(isv){ + while(vu){ + i = vu % base; + vu /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = conv[i]; + n++; + } + }else{ + while(u){ + i = u % base; + u /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = conv[i]; + n++; + } + } + if(n == 0){ + /* + * "The result of converting a zero value with + * a precision of zero is no characters." - ANSI + * + * "For o conversion, # increases the precision, if and only if + * necessary, to force the first digit of the result to be a zero + * (if the value and precision are both 0, a single 0 is printed)." - ANSI + */ + if(!(fl & FmtPrec) || f->prec != 0 || (f->r == 'o' && (fl & FmtSharp))){ + *p-- = '0'; + n = 1; + if(fl & FmtApost) + __needsep(&ndig, &grouping); + } + + /* + * Zero values don't get 0x. + */ + if(f->r == 'x' || f->r == 'X') + fl &= ~FmtSharp; + } + for(w = f->prec; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = '0'; + } + if(neg || (fl & (FmtSign|FmtSpace))) + n++; + if(fl & FmtSharp){ + if(base == 16) + n += 2; + else if(base == 8){ + if(p[1] == '0') + fl &= ~FmtSharp; + else + n++; + } + } + if((fl & FmtZero) && !(fl & (FmtLeft|FmtPrec))){ + w = 0; + if(fl & FmtWidth) + w = f->width; + for(; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = '0'; + } + f->flags &= ~FmtWidth; + } + if(fl & FmtSharp){ + if(base == 16) + *p-- = f->r; + if(base == 16 || base == 8) + *p-- = '0'; + } + if(neg) + *p-- = '-'; + else if(fl & FmtSign) + *p-- = '+'; + else if(fl & FmtSpace) + *p-- = ' '; + f->flags &= ~FmtPrec; + return __fmtcpy(f, p + 1, n, n + excess); +} + +int +__countfmt(Fmt *f) +{ + void *p; + ulong fl; + + fl = f->flags; + p = va_arg(f->args, void*); + if(fl & FmtVLong){ + *(vlong*)p = f->nfmt; + }else if(fl & FmtLong){ + *(long*)p = f->nfmt; + }else if(fl & FmtByte){ + *(char*)p = f->nfmt; + }else if(fl & FmtShort){ + *(short*)p = f->nfmt; + }else{ + *(int*)p = f->nfmt; + } + return 0; +} + +int +__flagfmt(Fmt *f) +{ + switch(f->r){ + case ',': + f->flags |= FmtComma; + break; + case '-': + f->flags |= FmtLeft; + break; + case '+': + f->flags |= FmtSign; + break; + case '#': + f->flags |= FmtSharp; + break; + case '\'': + f->flags |= FmtApost; + break; + case ' ': + f->flags |= FmtSpace; + break; + case 'u': + f->flags |= FmtUnsigned; + break; + case 'h': + if(f->flags & FmtShort) + f->flags |= FmtByte; + f->flags |= FmtShort; + break; + case 'L': + f->flags |= FmtLDouble; + break; + case 'l': + if(f->flags & FmtLong) + f->flags |= FmtVLong; + f->flags |= FmtLong; + break; + } + return 1; +} + +/* default error format */ +int +__badfmt(Fmt *f) +{ + char x[2+UTFmax]; + int n; + + x[0] = '%'; + n = 1 + runetochar(x+1, &f->r); + x[n++] = '%'; + f->prec = n; + __fmtcpy(f, (const void*)x, n, n); + return 0; +} diff --git a/mk/libfmt/dorfmt.c b/mk/libfmt/dorfmt.c @@ -0,0 +1,50 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ + +/* BUG: THIS FILE IS NOT UPDATED TO THE NEW SPEC */ +int +dorfmt(Fmt *f, const Rune *fmt) +{ + Rune *rt, *rs; + int r; + char *t, *s; + int nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + while((r = *fmt++) && r != '%'){ + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + while((r = *fmt++) && r != '%'){ + FMTRUNE(f, t, f->stop, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = (Rune*)__fmtdispatch(f, (Rune*)fmt, 1); + if(fmt == nil) + return -1; + } + return 0; /* not reached */ +} diff --git a/mk/libfmt/errfmt.c b/mk/libfmt/errfmt.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <errno.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +__errfmt(Fmt *f) +{ + char *s; + + s = strerror(errno); + return fmtstrcpy(f, s); +} diff --git a/mk/libfmt/fltfmt.c b/mk/libfmt/fltfmt.c @@ -0,0 +1,668 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdio.h> +#include <math.h> +#include <float.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <fmt.h> +#include <assert.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#include "nan.h" + +enum +{ + FDIGIT = 30, + FDEFLT = 6, + NSIGNIF = 17 +}; + +/* + * first few powers of 10, enough for about 1/2 of the + * total space for doubles. + */ +static double pows10[] = +{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, +}; +#define npows10 ((int)(sizeof(pows10)/sizeof(pows10[0]))) +#define pow10(x) fmtpow10(x) + +static double +pow10(int n) +{ + double d; + int neg; + + neg = 0; + if(n < 0){ + neg = 1; + n = -n; + } + + if(n < npows10) + d = pows10[n]; + else{ + d = pows10[npows10-1]; + for(;;){ + n -= npows10 - 1; + if(n < npows10){ + d *= pows10[n]; + break; + } + d *= pows10[npows10 - 1]; + } + } + if(neg) + return 1./d; + return d; +} + +/* + * add 1 to the decimal integer string a of length n. + * if 99999 overflows into 10000, return 1 to tell caller + * to move the virtual decimal point. + */ +static int +xadd1(char *a, int n) +{ + char *b; + int c; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--) { + c = *b + 1; + if(c <= '9') { + *b = c; + return 0; + } + *b = '0'; + } + /* + * need to overflow adding digit. + * shift number down and insert 1 at beginning. + * decimal is known to be 0s or we wouldn't + * have gotten this far. (e.g., 99999+1 => 00000) + */ + a[0] = '1'; + return 1; +} + +/* + * subtract 1 from the decimal integer string a. + * if 10000 underflows into 09999, make it 99999 + * and return 1 to tell caller to move the virtual + * decimal point. this way, xsub1 is inverse of xadd1. + */ +static int +xsub1(char *a, int n) +{ + char *b; + int c; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--) { + c = *b - 1; + if(c >= '0') { + if(c == '0' && b == a) { + /* + * just zeroed the top digit; shift everyone up. + * decimal is known to be 9s or we wouldn't + * have gotten this far. (e.g., 10000-1 => 09999) + */ + *b = '9'; + return 1; + } + *b = c; + return 0; + } + *b = '9'; + } + /* + * can't get here. the number a is always normalized + * so that it has a nonzero first digit. + */ + abort(); +} + +/* + * format exponent like sprintf(p, "e%+02d", e) + */ +static void +xfmtexp(char *p, int e, int ucase) +{ + char se[9]; + int i; + + *p++ = ucase ? 'E' : 'e'; + if(e < 0) { + *p++ = '-'; + e = -e; + } else + *p++ = '+'; + i = 0; + while(e) { + se[i++] = e % 10 + '0'; + e /= 10; + } + while(i < 2) + se[i++] = '0'; + while(i > 0) + *p++ = se[--i]; + *p++ = '\0'; +} + +/* + * compute decimal integer m, exp such that: + * f = m*10^exp + * m is as short as possible with losing exactness + * assumes special cases (NaN, +Inf, -Inf) have been handled. + */ +static void +xdtoa(double f, char *s, int *exp, int *neg, int *ns) +{ + int c, d, e2, e, ee, i, ndigit, oerrno; + char tmp[NSIGNIF+10]; + double g; + + oerrno = errno; /* in case strtod smashes errno */ + + /* + * make f non-negative. + */ + *neg = 0; + if(f < 0) { + f = -f; + *neg = 1; + } + + /* + * must handle zero specially. + */ + if(f == 0){ + *exp = 0; + s[0] = '0'; + s[1] = '\0'; + *ns = 1; + return; + } + + /* + * find g,e such that f = g*10^e. + * guess 10-exponent using 2-exponent, then fine tune. + */ + frexp(f, &e2); + e = (int)(e2 * .301029995664); + g = f * pow10(-e); + while(g < 1) { + e--; + g = f * pow10(-e); + } + while(g >= 10) { + e++; + g = f * pow10(-e); + } + + /* + * convert NSIGNIF digits as a first approximation. + */ + for(i=0; i<NSIGNIF; i++) { + d = (int)g; + s[i] = d+'0'; + g = (g-d) * 10; + } + s[i] = 0; + + /* + * adjust e because s is 314159... not 3.14159... + */ + e -= NSIGNIF-1; + xfmtexp(s+NSIGNIF, e, 0); + + /* + * adjust conversion until strtod(s) == f exactly. + */ + for(i=0; i<10; i++) { + g = fmtstrtod(s, nil); + if(f > g) { + if(xadd1(s, NSIGNIF)) { + /* gained a digit */ + e--; + xfmtexp(s+NSIGNIF, e, 0); + } + continue; + } + if(f < g) { + if(xsub1(s, NSIGNIF)) { + /* lost a digit */ + e++; + xfmtexp(s+NSIGNIF, e, 0); + } + continue; + } + break; + } + + /* + * play with the decimal to try to simplify. + */ + + /* + * bump last few digits up to 9 if we can + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '9') { + s[i] = '9'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; + break; + } + } + } + + /* + * add 1 in hopes of turning 9s to 0s + */ + if(s[NSIGNIF-1] == '9') { + strcpy(tmp, s); + ee = e; + if(xadd1(tmp, NSIGNIF)) { + ee--; + xfmtexp(tmp+NSIGNIF, ee, 0); + } + g = fmtstrtod(tmp, nil); + if(g == f) { + strcpy(s, tmp); + e = ee; + } + } + + /* + * bump last few digits down to 0 as we can. + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '0') { + s[i] = '0'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; + break; + } + } + } + + /* + * remove trailing zeros. + */ + ndigit = NSIGNIF; + while(ndigit > 1 && s[ndigit-1] == '0'){ + e++; + --ndigit; + } + s[ndigit] = 0; + *exp = e; + *ns = ndigit; + errno = oerrno; +} + +#ifdef PLAN9PORT +static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; +#else +static char *special[] = { "nan", "NAN", "inf", "INF", "-inf", "-INF" }; +#endif + +int +__efgfmt(Fmt *fmt) +{ + char buf[NSIGNIF+10], *dot, *digits, *p, *s, suf[10], *t; + double f; + int c, chr, dotwid, e, exp, fl, ndigits, neg, newndigits; + int pad, point, prec, realchr, sign, sufwid, ucase, wid, z1, z2; + Rune r, *rs, *rt; + + if(fmt->flags&FmtLong) + f = va_arg(fmt->args, long double); + else + f = va_arg(fmt->args, double); + + /* + * extract formatting flags + */ + fl = fmt->flags; + fmt->flags = 0; + prec = FDEFLT; + if(fl & FmtPrec) + prec = fmt->prec; + chr = fmt->r; + ucase = 0; + switch(chr) { + case 'A': + case 'E': + case 'F': + case 'G': + chr += 'a'-'A'; + ucase = 1; + break; + } + + /* + * pick off special numbers. + */ + if(__isNaN(f)) { + s = special[0+ucase]; + special: + fmt->flags = fl & (FmtWidth|FmtLeft); + return __fmtcpy(fmt, s, strlen(s), strlen(s)); + } + if(__isInf(f, 1)) { + s = special[2+ucase]; + goto special; + } + if(__isInf(f, -1)) { + s = special[4+ucase]; + goto special; + } + + /* + * get exact representation. + */ + digits = buf; + xdtoa(f, digits, &exp, &neg, &ndigits); + + /* + * get locale's decimal point. + */ + dot = fmt->decimal; + if(dot == nil) + dot = "."; + dotwid = utflen(dot); + + /* + * now the formatting fun begins. + * compute parameters for actual fmt: + * + * pad: number of spaces to insert before/after field. + * z1: number of zeros to insert before digits + * z2: number of zeros to insert after digits + * point: number of digits to print before decimal point + * ndigits: number of digits to use from digits[] + * suf: trailing suffix, like "e-5" + */ + realchr = chr; + switch(chr){ + case 'g': + /* + * convert to at most prec significant digits. (prec=0 means 1) + */ + if(prec == 0) + prec = 1; + if(ndigits > prec) { + if(digits[prec] >= '5' && xadd1(digits, prec)) + exp++; + exp += ndigits-prec; + ndigits = prec; + } + + /* + * extra rules for %g (implemented below): + * trailing zeros removed after decimal unless FmtSharp. + * decimal point only if digit follows. + */ + + /* fall through to %e */ + default: + case 'e': + /* + * one significant digit before decimal, no leading zeros. + */ + point = 1; + z1 = 0; + + /* + * decimal point is after ndigits digits right now. + * slide to be after first. + */ + e = exp + (ndigits-1); + + /* + * if this is %g, check exponent and convert prec + */ + if(realchr == 'g') { + if(-4 <= e && e < prec) + goto casef; + prec--; /* one digit before decimal; rest after */ + } + + /* + * compute trailing zero padding or truncate digits. + */ + if(1+prec >= ndigits) + z2 = 1+prec - ndigits; + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = 1+prec; + if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * had 999e4, now have 100e5 + */ + e++; + } + ndigits = newndigits; + z2 = 0; + } + xfmtexp(suf, e, ucase); + sufwid = strlen(suf); + break; + + casef: + case 'f': + /* + * determine where digits go with respect to decimal point + */ + if(ndigits+exp > 0) { + point = ndigits+exp; + z1 = 0; + } else { + point = 1; + z1 = 1 + -(ndigits+exp); + } + + /* + * %g specifies prec = number of significant digits + * convert to number of digits after decimal point + */ + if(realchr == 'g') + prec += z1 - point; + + /* + * compute trailing zero padding or truncate digits. + */ + if(point+prec >= z1+ndigits) + z2 = point+prec - (z1+ndigits); + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = point+prec - z1; + if(newndigits < 0) { + z1 += newndigits; + newndigits = 0; + } else if(newndigits == 0) { + /* perhaps round up */ + if(digits[0] >= '5'){ + digits[0] = '1'; + newndigits = 1; + goto newdigit; + } + } else if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * digits was 999, is now 100; make it 1000 + */ + digits[newndigits++] = '0'; + newdigit: + /* + * account for new digit + */ + if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ + z1--; + else /* 9.99 => 10.00 */ + point++; + } + z2 = 0; + ndigits = newndigits; + } + sufwid = 0; + break; + } + + /* + * if %g is given without FmtSharp, remove trailing zeros. + * must do after truncation, so that e.g. print %.3g 1.001 + * produces 1, not 1.00. sorry, but them's the rules. + */ + if(realchr == 'g' && !(fl & FmtSharp)) { + if(z1+ndigits+z2 >= point) { + if(z1+ndigits < point) + z2 = point - (z1+ndigits); + else{ + z2 = 0; + while(z1+ndigits > point && digits[ndigits-1] == '0') + ndigits--; + } + } + } + + /* + * compute width of all digits and decimal point and suffix if any + */ + wid = z1+ndigits+z2; + if(wid > point) + wid += dotwid; + else if(wid == point){ + if(fl & FmtSharp) + wid += dotwid; + else + point++; /* do not print any decimal point */ + } + wid += sufwid; + + /* + * determine sign + */ + sign = 0; + if(neg) + sign = '-'; + else if(fl & FmtSign) + sign = '+'; + else if(fl & FmtSpace) + sign = ' '; + if(sign) + wid++; + + /* + * compute padding + */ + pad = 0; + if((fl & FmtWidth) && fmt->width > wid) + pad = fmt->width - wid; + if(pad && !(fl & FmtLeft) && (fl & FmtZero)){ + z1 += pad; + point += pad; + pad = 0; + } + + /* + * format the actual field. too bad about doing this twice. + */ + if(fmt->runes){ + if(pad && !(fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + rt = (Rune*)fmt->to; + rs = (Rune*)fmt->stop; + if(sign) + FMTRCHAR(fmt, rt, rs, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTRCHAR(fmt, rt, rs, c); + if(--point == 0) { + for(p = dot; *p; ){ + p += chartorune(&r, p); + FMTRCHAR(fmt, rt, rs, r); + } + } + } + fmt->nfmt += rt - (Rune*)fmt->to; + fmt->to = rt; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + }else{ + if(pad && !(fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + t = (char*)fmt->to; + s = (char*)fmt->stop; + if(sign) + FMTCHAR(fmt, t, s, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTCHAR(fmt, t, s, c); + if(--point == 0) + for(p=dot; *p; p++) + FMTCHAR(fmt, t, s, *p); + } + fmt->nfmt += t - (char*)fmt->to; + fmt->to = t; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + } + return 0; +} + diff --git a/mk/libfmt/fmt.c b/mk/libfmt/fmt.c @@ -0,0 +1,220 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +enum +{ + Maxfmt = 64 +}; + +typedef struct Convfmt Convfmt; +struct Convfmt +{ + int c; + volatile Fmts fmt; /* for spin lock in fmtfmt; avoids race due to write order */ +}; + +static struct +{ + /* lock by calling __fmtlock, __fmtunlock */ + int nfmt; + Convfmt fmt[Maxfmt]; +} fmtalloc; + +static Convfmt knownfmt[] = { + ' ', __flagfmt, + '#', __flagfmt, + '%', __percentfmt, + '\'', __flagfmt, + '+', __flagfmt, + ',', __flagfmt, + '-', __flagfmt, + 'C', __runefmt, /* Plan 9 addition */ + 'E', __efgfmt, +#ifndef PLAN9PORT + 'F', __efgfmt, /* ANSI only */ +#endif + 'G', __efgfmt, +#ifndef PLAN9PORT + 'L', __flagfmt, /* ANSI only */ +#endif + 'S', __runesfmt, /* Plan 9 addition */ + 'X', __ifmt, + 'b', __ifmt, /* Plan 9 addition */ + 'c', __charfmt, + 'd', __ifmt, + 'e', __efgfmt, + 'f', __efgfmt, + 'g', __efgfmt, + 'h', __flagfmt, +#ifndef PLAN9PORT + 'i', __ifmt, /* ANSI only */ +#endif + 'l', __flagfmt, + 'n', __countfmt, + 'o', __ifmt, + 'p', __ifmt, + 'r', __errfmt, + 's', __strfmt, +#ifdef PLAN9PORT + 'u', __flagfmt, +#else + 'u', __ifmt, +#endif + 'x', __ifmt, + 0, nil, +}; + + +int (*fmtdoquote)(int); + +/* + * __fmtlock() must be set + */ +static int +__fmtinstall(int c, Fmts f) +{ + Convfmt *p, *ep; + + if(c<=0 || c>=65536) + return -1; + if(!f) + f = __badfmt; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c) + break; + + if(p == &fmtalloc.fmt[Maxfmt]) + return -1; + + p->fmt = f; + if(p == ep){ /* installing a new format character */ + fmtalloc.nfmt++; + p->c = c; + } + + return 0; +} + +int +fmtinstall(int c, int (*f)(Fmt*)) +{ + int ret; + + __fmtlock(); + ret = __fmtinstall(c, f); + __fmtunlock(); + return ret; +} + +static Fmts +fmtfmt(int c) +{ + Convfmt *p, *ep; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c){ + while(p->fmt == nil) /* loop until value is updated */ + ; + return p->fmt; + } + + /* is this a predefined format char? */ + __fmtlock(); + for(p=knownfmt; p->c; p++) + if(p->c == c){ + __fmtinstall(p->c, p->fmt); + __fmtunlock(); + return p->fmt; + } + __fmtunlock(); + + return __badfmt; +} + +void* +__fmtdispatch(Fmt *f, void *fmt, int isrunes) +{ + Rune rune, r; + int i, n; + + f->flags = 0; + f->width = f->prec = 0; + + for(;;){ + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + fmt = (char*)fmt + chartorune(&rune, (char*)fmt); + r = rune; + } + f->r = r; + switch(r){ + case '\0': + return nil; + case '.': + f->flags |= FmtWidth|FmtPrec; + continue; + case '0': + if(!(f->flags & FmtWidth)){ + f->flags |= FmtZero; + continue; + } + /* fall through */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + i = 0; + while(r >= '0' && r <= '9'){ + i = i * 10 + r - '0'; + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + r = *(char*)fmt; + fmt = (char*)fmt + 1; + } + } + if(isrunes) + fmt = (Rune*)fmt - 1; + else + fmt = (char*)fmt - 1; + numflag: + if(f->flags & FmtWidth){ + f->flags |= FmtPrec; + f->prec = i; + }else{ + f->flags |= FmtWidth; + f->width = i; + } + continue; + case '*': + i = va_arg(f->args, int); + if(i < 0){ + /* + * negative precision => + * ignore the precision. + */ + if(f->flags & FmtPrec){ + f->flags &= ~FmtPrec; + f->prec = 0; + continue; + } + i = -i; + f->flags |= FmtLeft; + } + goto numflag; + } + n = (*fmtfmt(r))(f); + if(n < 0) + return nil; + if(n == 0) + return fmt; + } +} diff --git a/mk/libfmt/fmt.h b/mk/libfmt/fmt.h @@ -0,0 +1,116 @@ +#ifndef _FMT_H_ +#define _FMT_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ + +#include <stdarg.h> +#include <utf.h> + +typedef struct Fmt Fmt; +struct Fmt{ + unsigned char runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt *); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + Rune r; /* % format Rune */ + int width; + int prec; + unsigned long flags; + char *decimal; /* decimal point; cannot be "" */ + + /* For %'d */ + char *thousands; /* separator for thousands */ + + /* + * Each char is an integer indicating #digits before next separator. Values: + * \xFF: no more grouping (or \x7F; defined to be CHAR_MAX in POSIX) + * \x00: repeat previous indefinitely + * \x**: count that many + */ + char *grouping; /* descriptor of separator placement */ +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtApost = FmtSign << 1, + FmtZero = FmtApost << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + FmtByte = FmtComma << 1, + FmtLDouble = FmtByte << 1, + + FmtFlag = FmtLDouble << 1 +}; + +extern int (*fmtdoquote)(int); + +/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/fmt/?*.c | grep -v static |grep -v __ */ +int dofmt(Fmt *f, char *fmt); +int dorfmt(Fmt *f, const Rune *fmt); +double fmtcharstod(int(*f)(void*), void *vp); +int fmtfdflush(Fmt *f); +int fmtfdinit(Fmt *f, int fd, char *buf, int size); +int fmtinstall(int c, int (*f)(Fmt*)); +int fmtnullinit(Fmt*); +void fmtlocaleinit(Fmt*, char*, char*, char*); +int fmtprint(Fmt *f, char *fmt, ...); +int fmtrune(Fmt *f, int r); +int fmtrunestrcpy(Fmt *f, Rune *s); +int fmtstrcpy(Fmt *f, char *s); +char* fmtstrflush(Fmt *f); +int fmtstrinit(Fmt *f); +double fmtstrtod(const char *as, char **aas); +int fmtvprint(Fmt *f, char *fmt, va_list args); +int fprint(int fd, char *fmt, ...); +int print(char *fmt, ...); +void quotefmtinstall(void); +int quoterunestrfmt(Fmt *f); +int quotestrfmt(Fmt *f); +Rune* runefmtstrflush(Fmt *f); +int runefmtstrinit(Fmt *f); +Rune* runeseprint(Rune *buf, Rune *e, char *fmt, ...); +Rune* runesmprint(char *fmt, ...); +int runesnprint(Rune *buf, int len, char *fmt, ...); +int runesprint(Rune *buf, char *fmt, ...); +Rune* runevseprint(Rune *buf, Rune *e, char *fmt, va_list args); +Rune* runevsmprint(char *fmt, va_list args); +int runevsnprint(Rune *buf, int len, char *fmt, va_list args); +char* seprint(char *buf, char *e, char *fmt, ...); +char* smprint(char *fmt, ...); +int snprint(char *buf, int len, char *fmt, ...); +int sprint(char *buf, char *fmt, ...); +int vfprint(int fd, char *fmt, va_list args); +char* vseprint(char *buf, char *e, char *fmt, va_list args); +char* vsmprint(char *fmt, va_list args); +int vsnprint(char *buf, int len, char *fmt, va_list args); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libfmt/fmtdef.h b/mk/libfmt/fmtdef.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ + +/* + * dofmt -- format to a buffer + * the number of characters formatted is returned, + * or -1 if there was an error. + * if the buffer is ever filled, flush is called. + * it should reset the buffer and return whether formatting should continue. + */ + +typedef int (*Fmts)(Fmt*); + +typedef struct Quoteinfo Quoteinfo; +struct Quoteinfo +{ + int quoted; /* if set, string must be quoted */ + int nrunesin; /* number of input runes that can be accepted */ + int nbytesin; /* number of input bytes that can be accepted */ + int nrunesout; /* number of runes that will be generated */ + int nbytesout; /* number of bytes that will be generated */ +}; + +/* Edit .+1,/^$/ |cfn |grep -v static | grep __ */ +double __Inf(int sign); +double __NaN(void); +int __badfmt(Fmt *f); +int __charfmt(Fmt *f); +int __countfmt(Fmt *f); +int __efgfmt(Fmt *fmt); +int __errfmt(Fmt *f); +int __flagfmt(Fmt *f); +int __fmtFdFlush(Fmt *f); +int __fmtcpy(Fmt *f, const void *vm, int n, int sz); +void* __fmtdispatch(Fmt *f, void *fmt, int isrunes); +void * __fmtflush(Fmt *f, void *t, int len); +void __fmtlock(void); +int __fmtpad(Fmt *f, int n); +double __fmtpow10(int n); +int __fmtrcpy(Fmt *f, const void *vm, int n); +void __fmtunlock(void); +int __ifmt(Fmt *f); +int __isInf(double d, int sign); +int __isNaN(double d); +int __needsep(int*, char**); +int __needsquotes(char *s, int *quotelenp); +int __percentfmt(Fmt *f); +void __quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout); +int __quotestrfmt(int runesin, Fmt *f); +int __rfmtpad(Fmt *f, int n); +int __runefmt(Fmt *f); +int __runeneedsquotes(Rune *r, int *quotelenp); +int __runesfmt(Fmt *f); +int __strfmt(Fmt *f); + +#define FMTCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (char*)s){\ + t = (char*)__fmtflush(f, t, 1);\ + if(t != nil)\ + s = (char*)f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (Rune*)s){\ + t = (Rune*)__fmtflush(f, t, sizeof(Rune));\ + if(t != nil)\ + s = (Rune*)f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRUNE(f, t, s, r)\ + do{\ + Rune _rune;\ + int _runelen;\ + if(t + UTFmax > (char*)s && t + (_runelen = runelen(r)) > (char*)s){\ + t = (char*)__fmtflush(f, t, _runelen);\ + if(t != nil)\ + s = (char*)f->stop;\ + else\ + return -1;\ + }\ + if(r < Runeself)\ + *t++ = r;\ + else{\ + _rune = r;\ + t += runetochar(t, &_rune);\ + }\ + }while(0) + +#ifdef va_copy +# define VA_COPY(a,b) va_copy(a,b) +# define VA_END(a) va_end(a) +#else +# define VA_COPY(a,b) (a) = (b) +# define VA_END(a) +#endif + diff --git a/mk/libfmt/fmtfd.c b/mk/libfmt/fmtfd.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * public routine for final flush of a formatting buffer + * to a file descriptor; returns total char count. + */ +int +fmtfdflush(Fmt *f) +{ + if(__fmtFdFlush(f) <= 0) + return -1; + return f->nfmt; +} + +/* + * initialize an output buffer for buffered printing + */ +int +fmtfdinit(Fmt *f, int fd, char *buf, int size) +{ + f->runes = 0; + f->start = buf; + f->to = buf; + f->stop = buf + size; + f->flush = __fmtFdFlush; + f->farg = (void*)(uintptr_t)fd; + f->flags = 0; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} diff --git a/mk/libfmt/fmtfdflush.c b/mk/libfmt/fmtfdflush.c @@ -0,0 +1,22 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <unistd.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * generic routine for flushing a formatting buffer + * to a file descriptor + */ +int +__fmtFdFlush(Fmt *f) +{ + int n; + + n = (char*)f->to - (char*)f->start; + if(n && write((uintptr)f->farg, f->start, n) != n) + return 0; + f->to = f->start; + return 1; +} diff --git a/mk/libfmt/fmtinstall.3 b/mk/libfmt/fmtinstall.3 @@ -0,0 +1,379 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH FMTINSTALL 3 +.SH NAME +fmtinstall, dofmt, dorfmt, fmtprint, fmtvprint, fmtrune, fmtstrcpy, fmtrunestrcpy, fmtfdinit, fmtfdflush, fmtstrinit, fmtstrflush, runefmtstrinit, runefmtstrflush, errfmt \- support for user-defined print formats and output routines +.SH SYNOPSIS +.B #include <utf.h> +.br +.B #include <fmt.h> +.PP +.ft L +.nf +.ta \w' 'u +\w' 'u +\w' 'u +\w' 'u +\w' 'u +typedef struct Fmt Fmt; +struct Fmt{ + uchar runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt*); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format Rune */ + int width; + int prec; + ulong flags; +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtZero = FmtSign << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + + FmtFlag = FmtComma << 1 +}; +.fi +.PP +.B +.ta \w'\fLchar* 'u + +.PP +.B +int fmtfdinit(Fmt *f, int fd, char *buf, int nbuf); +.PP +.B +int fmtfdflush(Fmt *f); +.PP +.B +int fmtstrinit(Fmt *f); +.PP +.B +char* fmtstrflush(Fmt *f); +.PP +.B +int runefmtstrinit(Fmt *f); +.PP +.B +Rune* runefmtstrflush(Fmt *f); + +.PP +.B +int fmtinstall(int c, int (*fn)(Fmt*)); +.PP +.B +int dofmt(Fmt *f, char *fmt); +.PP +.B +int dorfmt(Fmt*, Rune *fmt); +.PP +.B +int fmtprint(Fmt *f, char *fmt, ...); +.PP +.B +int fmtvprint(Fmt *f, char *fmt, va_list v); +.PP +.B +int fmtrune(Fmt *f, int r); +.PP +.B +int fmtstrcpy(Fmt *f, char *s); +.PP +.B +int fmtrunestrcpy(Fmt *f, Rune *s); +.PP +.B +int errfmt(Fmt *f); +.SH DESCRIPTION +The interface described here allows the construction of custom +.IR print (3) +verbs and output routines. +In essence, they provide access to the workings of the formatted print code. +.PP +The +.IR print (3) +suite maintains its state with a data structure called +.BR Fmt . +A typical call to +.IR print (3) +or its relatives initializes a +.B Fmt +structure, passes it to subsidiary routines to process the output, +and finishes by emitting any saved state recorded in the +.BR Fmt . +The details of the +.B Fmt +are unimportant to outside users, except insofar as the general +design influences the interface. +The +.B Fmt +records whether the output is in runes or bytes, +the verb being processed, its precision and width, +and buffering parameters. +Most important, it also records a +.I flush +routine that the library will call if a buffer overflows. +When printing to a file descriptor, the flush routine will +emit saved characters and reset the buffer; when printing +to an allocated string, it will resize the string to receive more output. +The flush routine is nil when printing to fixed-size buffers. +User code need never provide a flush routine; this is done internally +by the library. +.SS Custom output routines +To write a custom output routine, such as an error handler that +formats and prints custom error messages, the output sequence can be run +from outside the library using the routines described here. +There are two main cases: output to an open file descriptor +and output to a string. +.PP +To write to a file descriptor, call +.I fmtfdinit +to initialize the local +.B Fmt +structure +.IR f , +giving the file descriptor +.IR fd , +the buffer +.IR buf , +and its size +.IR nbuf . +Then call +.IR fmtprint +or +.IR fmtvprint +to generate the output. +These behave like +.B fprint +(see +.IR print (3)) +or +.B vfprint +except that the characters are buffered until +.I fmtfdflush +is called and the return value is either 0 or \-1. +A typical example of this sequence appears in the Examples section. +.PP +The same basic sequence applies when outputting to an allocated string: +call +.I fmtstrinit +to initialize the +.BR Fmt , +then call +.I fmtprint +and +.I fmtvprint +to generate the output. +Finally, +.I fmtstrflush +will return the allocated string, which should be freed after use. +To output to a rune string, use +.I runefmtstrinit +and +.IR runefmtstrflush . +Regardless of the output style or type, +.I fmtprint +or +.I fmtvprint +generates the characters. +.SS Custom format verbs +.I Fmtinstall +is used to install custom verbs and flags labeled by character +.IR c , +which may be any non-zero Unicode character. +.I Fn +should be declared as +.IP +.EX +int fn(Fmt*) +.EE +.PP +.IB Fp ->r +is the flag or verb character to cause +.I fn +to be called. +In +.IR fn , +.IB fp ->width , +.IB fp ->prec +are the width and precision, and +.IB fp ->flags +the decoded flags for the verb (see +.IR print (3) +for a description of these items). +The standard flag values are: +.B FmtSign +.RB ( + ), +.B FmtLeft +.RB ( - ), +.B FmtSpace +.RB ( '\ ' ), +.B FmtSharp +.RB ( # ), +.B FmtComma +.RB ( , ), +.B FmtLong +.RB ( l ), +.B FmtShort +.RB ( h ), +.B FmtUnsigned +.RB ( u ), +and +.B FmtVLong +.RB ( ll ). +The flag bits +.B FmtWidth +and +.B FmtPrec +identify whether a width and precision were specified. +.PP +.I Fn +is passed a pointer to the +.B Fmt +structure recording the state of the output. +If +.IB fp ->r +is a verb (rather than a flag), +.I fn +should use +.B Fmt->args +to fetch its argument from the list, +then format it, and return zero. +If +.IB fp ->r +is a flag, +.I fn +should return one. +All interpretation of +.IB fp ->width\f1, +.IB fp ->prec\f1, +and +.IB fp-> flags +is left up to the conversion routine. +.I Fmtinstall +returns 0 if the installation succeeds, \-1 if it fails. +.PP +.IR Fmtprint +and +.IR fmtvprint +may be called to +help prepare output in custom conversion routines. +However, these functions clear the width, precision, and flags. +Both functions return 0 for success and \-1 for failure. +.PP +The functions +.I dofmt +and +.I dorfmt +are the underlying formatters; they +use the existing contents of +.B Fmt +and should be called only by sophisticated conversion routines. +These routines return the number of characters (bytes of UTF or runes) +produced. +.PP +Some internal functions may be useful to format primitive types. +They honor the width, precision and flags as described in +.IR print (3). +.I Fmtrune +formats a single character +.BR r . +.I Fmtstrcpy +formats a string +.BR s ; +.I fmtrunestrcpy +formats a rune string +.BR s . +.I Errfmt +formats the system error string. +All these routines return zero for successful execution. +Conversion routines that call these functions will work properly +regardless of whether the output is bytes or runes. +.\" .PP +.\" .IR 2c (1) +.\" describes the C directive +.\" .B #pragma +.\" .B varargck +.\" that can be used to provide type-checking for custom print verbs and output routines. +.SH EXAMPLES +This function prints an error message with a variable +number of arguments and then quits. +Compared to the corresponding example in +.IR print (3), +this version uses a smaller buffer, will never truncate +the output message, but might generate multiple +.B write +system calls to produce its output. +.IP +.EX +.ta 6n +6n +6n +6n +6n +6n +6n +6n +6n +#pragma varargck argpos error 1 + +void fatal(char *fmt, ...) +{ + Fmt f; + char buf[64]; + va_list arg; + + fmtfdinit(&f, 1, buf, sizeof buf); + fmtprint(&f, "fatal: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + fmtprint(&f, "\en"); + fmtfdflush(&f); + exits("fatal error"); +} +.EE +.PP +This example adds a verb to print complex numbers. +.IP +.EX +typedef +struct { + double r, i; +} Complex; + +#pragma varargck type "X" Complex + +int +Xfmt(Fmt *f) +{ + Complex c; + + c = va_arg(f->args, Complex); + return fmtprint(f, "(%g,%g)", c.r, c.i); +} + +main(...) +{ + Complex x = (Complex){ 1.5, -2.3 }; + + fmtinstall('X', Xfmt); + print("x = %X\en", x); +} +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR print (3), +.IR utf (7) +.SH DIAGNOSTICS +These routines return negative numbers or nil for errors and set +.IR errstr . diff --git a/mk/libfmt/fmtlocale.c b/mk/libfmt/fmtlocale.c @@ -0,0 +1,55 @@ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Fill in the internationalization stuff in the State structure. + * For nil arguments, provide the sensible defaults: + * decimal is a period + * thousands separator is a comma + * thousands are marked every three digits + */ +void +fmtlocaleinit(Fmt *f, char *decimal, char *thousands, char *grouping) +{ + if(decimal == nil || decimal[0] == '\0') + decimal = "."; + if(thousands == nil) + thousands = ","; + if(grouping == nil) + grouping = "\3"; + f->decimal = decimal; + f->thousands = thousands; + f->grouping = grouping; +} + +/* + * We are about to emit a digit in e.g. %'d. If that digit would + * overflow a thousands (e.g.) grouping, tell the caller to emit + * the thousands separator. Always advance the digit counter + * and pointer into the grouping descriptor. + */ +int +__needsep(int *ndig, char **grouping) +{ + int group; + + (*ndig)++; + group = *(unsigned char*)*grouping; + /* CHAR_MAX means no further grouping. \0 means we got the empty string */ + if(group == 0xFF || group == 0x7f || group == 0x00) + return 0; + if(*ndig > group){ + /* if we're at end of string, continue with this grouping; else advance */ + if((*grouping)[1] != '\0') + (*grouping)++; + *ndig = 1; + return 1; + } + return 0; +} + diff --git a/mk/libfmt/fmtlock.c b/mk/libfmt/fmtlock.c @@ -0,0 +1,15 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +void +__fmtlock(void) +{ +} + +void +__fmtunlock(void) +{ +} diff --git a/mk/libfmt/fmtnull.c b/mk/libfmt/fmtnull.c @@ -0,0 +1,33 @@ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Absorb output without using resources. + */ +static Rune nullbuf[32]; + +static int +__fmtnullflush(Fmt *f) +{ + f->to = nullbuf; + f->nfmt = 0; + return 0; +} + +int +fmtnullinit(Fmt *f) +{ + memset(f, 0, sizeof *f); + f->runes = 1; + f->start = nullbuf; + f->to = nullbuf; + f->stop = nullbuf+nelem(nullbuf); + f->flush = __fmtnullflush; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + diff --git a/mk/libfmt/fmtprint.c b/mk/libfmt/fmtprint.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * format a string into the output buffer + * designed for formats which themselves call fmt, + * but ignore any width flags + */ +int +fmtprint(Fmt *f, char *fmt, ...) +{ + va_list va; + int n; + + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(va, f->args); + VA_END(f->args); + va_start(f->args, fmt); + n = dofmt(f, fmt); + va_end(f->args); + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(f->args,va); + VA_END(va); + if(n >= 0) + return 0; + return n; +} + diff --git a/mk/libfmt/fmtquote.c b/mk/libfmt/fmtquote.c @@ -0,0 +1,259 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * How many bytes of output UTF will be produced by quoting (if necessary) this string? + * How many runes? How much of the input will be consumed? + * The parameter q is filled in by __quotesetup. + * The string may be UTF or Runes (s or r). + * Return count does not include NUL. + * Terminate the scan at the first of: + * NUL in input + * count exceeded in input + * count exceeded on output + * *ninp is set to number of input bytes accepted. + * nin may be <0 initially, to avoid checking input by count. + */ +void +__quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout) +{ + int w; + Rune c; + + q->quoted = 0; + q->nbytesout = 0; + q->nrunesout = 0; + q->nbytesin = 0; + q->nrunesin = 0; + if(sharp || nin==0 || (s && *s=='\0') || (r && *r=='\0')){ + if(nout < 2) + return; + q->quoted = 1; + q->nbytesout = 2; + q->nrunesout = 2; + } + for(; nin!=0; nin--){ + if(s) + w = chartorune(&c, s); + else{ + c = *r; + w = runelen(c); + } + + if(c == '\0') + break; + if(runesout){ + if(q->nrunesout+1 > nout) + break; + }else{ + if(q->nbytesout+w > nout) + break; + } + + if((c <= L' ') || (c == L'\'') || (fmtdoquote!=nil && fmtdoquote(c))){ + if(!q->quoted){ + if(runesout){ + if(1+q->nrunesout+1+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w+1 > nout) /* no room for quotes */ + break; + } + q->nrunesout += 2; /* include quotes */ + q->nbytesout += 2; /* include quotes */ + q->quoted = 1; + } + if(c == '\'') { + if(runesout){ + if(1+q->nrunesout+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w > nout) /* no room for quotes */ + break; + } + q->nbytesout++; + q->nrunesout++; /* quotes reproduce as two characters */ + } + } + + /* advance input */ + if(s) + s += w; + else + r++; + q->nbytesin += w; + q->nrunesin++; + + /* advance output */ + q->nbytesout += w; + q->nrunesout++; + +#ifndef PLAN9PORT + /* ANSI requires precision in bytes, not Runes. */ + nin-= w-1; /* and then n-- in the loop */ +#endif + } +} + +static int +qstrfmt(char *sin, Rune *rin, Quoteinfo *q, Fmt *f) +{ + Rune r, *rm, *rme; + char *t, *s, *m, *me; + Rune *rt, *rs; + ulong fl; + int nc, w; + + m = sin; + me = m + q->nbytesin; + rm = rin; + rme = rm + q->nrunesin; + + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + t = (char*)f->to; + s = (char*)f->stop; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + if(f->runes) + FMTRCHAR(f, rt, rs, '\''); + else + FMTRUNE(f, t, s, '\''); + for(nc = q->nrunesin; nc > 0; nc--){ + if(sin){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + }else{ + if(rm >= rme) + break; + r = *(uchar*)rm++; + } + if(f->runes){ + FMTRCHAR(f, rt, rs, r); + if(r == '\'') + FMTRCHAR(f, rt, rs, r); + }else{ + FMTRUNE(f, t, s, r); + if(r == '\'') + FMTRUNE(f, t, s, r); + } + } + + if(f->runes){ + FMTRCHAR(f, rt, rs, '\''); + USED(rs); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + FMTRUNE(f, t, s, '\''); + USED(s); + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + return 0; +} + +int +__quotestrfmt(int runesin, Fmt *f) +{ + int nin, outlen; + Rune *r; + char *s; + Quoteinfo q; + + nin = -1; + if(f->flags&FmtPrec) + nin = f->prec; + if(runesin){ + r = va_arg(f->args, Rune *); + s = nil; + }else{ + s = va_arg(f->args, char *); + r = nil; + } + if(!s && !r) + return __fmtcpy(f, (void*)"<nil>", 5, 5); + + if(f->flush) + outlen = 0x7FFFFFFF; /* if we can flush, no output limit */ + else if(f->runes) + outlen = (Rune*)f->stop - (Rune*)f->to; + else + outlen = (char*)f->stop - (char*)f->to; + + __quotesetup(s, r, nin, outlen, &q, f->flags&FmtSharp, f->runes); +/*print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); */ + + if(runesin){ + if(!q.quoted) + return __fmtrcpy(f, r, q.nrunesin); + return qstrfmt(nil, r, &q, f); + } + + if(!q.quoted) + return __fmtcpy(f, s, q.nrunesin, q.nbytesin); + return qstrfmt(s, nil, &q, f); +} + +int +quotestrfmt(Fmt *f) +{ + return __quotestrfmt(0, f); +} + +int +quoterunestrfmt(Fmt *f) +{ + return __quotestrfmt(1, f); +} + +void +quotefmtinstall(void) +{ + fmtinstall('q', quotestrfmt); + fmtinstall('Q', quoterunestrfmt); +} + +int +__needsquotes(char *s, int *quotelenp) +{ + Quoteinfo q; + + __quotesetup(s, nil, -1, 0x7FFFFFFF, &q, 0, 0); + *quotelenp = q.nbytesout; + + return q.quoted; +} + +int +__runeneedsquotes(Rune *r, int *quotelenp) +{ + Quoteinfo q; + + __quotesetup(nil, r, -1, 0x7FFFFFFF, &q, 0, 0); + *quotelenp = q.nrunesout; + + return q.quoted; +} diff --git a/mk/libfmt/fmtrune.c b/mk/libfmt/fmtrune.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +fmtrune(Fmt *f, int r) +{ + Rune *rt; + char *t; + int n; + + if(f->runes){ + rt = (Rune*)f->to; + FMTRCHAR(f, rt, f->stop, r); + f->to = rt; + n = 1; + }else{ + t = (char*)f->to; + FMTRUNE(f, t, f->stop, r); + n = t - (char*)f->to; + f->to = t; + } + f->nfmt += n; + return 0; +} diff --git a/mk/libfmt/fmtstr.c b/mk/libfmt/fmtstr.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +fmtstrflush(Fmt *f) +{ + if(f->start == nil) + return nil; + *(char*)f->to = '\0'; + f->to = f->start; + return (char*)f->start; +} diff --git a/mk/libfmt/fmtvprint.c b/mk/libfmt/fmtvprint.c @@ -0,0 +1,37 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + + +/* + * format a string into the output buffer + * designed for formats which themselves call fmt, + * but ignore any width flags + */ +int +fmtvprint(Fmt *f, char *fmt, va_list args) +{ + va_list va; + int n; + + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(va,f->args); + VA_END(f->args); + VA_COPY(f->args,args); + n = dofmt(f, fmt); + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_END(f->args); + VA_COPY(f->args,va); + VA_END(va); + if(n >= 0) + return 0; + return n; +} + diff --git a/mk/libfmt/fprint.c b/mk/libfmt/fprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +fprint(int fd, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vfprint(fd, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/mkfile b/mk/libfmt/mkfile @@ -0,0 +1,49 @@ +LIB = libfmt.a +LOBJ = \ + dofmt.o\ + dorfmt.o\ + errfmt.o\ + fltfmt.o\ + fmt.o\ + fmtfd.o\ + fmtfdflush.o\ + fmtlocale.o\ + fmtlock.o\ + fmtnull.o\ + fmtprint.o\ + fmtquote.o\ + fmtrune.o\ + fmtstr.o\ + fmtvprint.o\ + fprint.o\ + print.o\ + runefmtstr.o\ + runeseprint.o\ + runesmprint.o\ + runesnprint.o\ + runesprint.o\ + runevseprint.o\ + runevsmprint.o\ + runevsnprint.o\ + seprint.o\ + smprint.o\ + snprint.o\ + sprint.o\ + strtod.o\ + vfprint.o\ + vseprint.o\ + vsmprint.o\ + vsnprint.o\ + charstod.o\ + pow10.o\ + nan64.o + +LOCAL_CFLAGS = -I"$PREFIX"/include -I. +CLEAN_FILES = test test.o +DEPS = libutf + +<$mkbuild/mk.default + +test: deps $LIB test.o + $CC -o test test.o $LIB $CFLAGS $LDFLAGS $LOCAL_CFLAGS -L"$PREFIX"/lib -L../libutf -lutf + diff --git a/mk/libfmt/nan.h b/mk/libfmt/nan.h @@ -0,0 +1,4 @@ +extern double __NaN(void); +extern double __Inf(int); +extern int __isNaN(double); +extern int __isInf(double, int); diff --git a/mk/libfmt/nan64.c b/mk/libfmt/nan64.c @@ -0,0 +1,78 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ + +/* + * 64-bit IEEE not-a-number routines. + * This is big/little-endian portable assuming that + * the 64-bit doubles and 64-bit integers have the + * same byte ordering. + */ + +#include "plan9.h" +#include <assert.h> +#include "fmt.h" +#include "fmtdef.h" + +static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; +static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; +static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; + +/* gcc sees through the obvious casts. */ +static uvlong +d2u(double d) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.d = d; + return u.v; +} + +static double +u2d(uvlong v) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.v = v; + return u.d; +} + +double +__NaN(void) +{ + return u2d(uvnan); +} + +int +__isNaN(double d) +{ + uvlong x; + + x = d2u(d); + /* IEEE 754: exponent bits 0x7FF and non-zero mantissa */ + return (x&uvinf) == uvinf && (x&~uvneginf) != 0; +} + +double +__Inf(int sign) +{ + return u2d(sign < 0 ? uvneginf : uvinf); +} + +int +__isInf(double d, int sign) +{ + uvlong x; + + x = d2u(d); + if(sign == 0) + return x==uvinf || x==uvneginf; + else if(sign > 0) + return x==uvinf; + else + return x==uvneginf; +} diff --git a/mk/libfmt/plan9.h b/mk/libfmt/plan9.h @@ -0,0 +1,38 @@ +#include <inttypes.h> + +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong +#define uintptr _fmtuintptr + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long uvlong; +typedef long long vlong; +typedef uintptr_t uintptr; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem(x) (sizeof (x)/sizeof (x)[0]) + diff --git a/mk/libfmt/pow10.c b/mk/libfmt/pow10.c @@ -0,0 +1,45 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * this table might overflow 127-bit exponent representations. + * in that case, truncate it after 1.0e38. + * it is important to get all one can from this + * routine since it is used in atof to scale numbers. + * the presumption is that C converts fp numbers better + * than multipication of lower powers of 10. + */ + +static +double tab[] = +{ + 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, + 1.0e10,1.0e11,1.0e12,1.0e13,1.0e14,1.0e15,1.0e16,1.0e17,1.0e18,1.0e19, + 1.0e20,1.0e21,1.0e22,1.0e23,1.0e24,1.0e25,1.0e26,1.0e27,1.0e28,1.0e29, + 1.0e30,1.0e31,1.0e32,1.0e33,1.0e34,1.0e35,1.0e36,1.0e37,1.0e38,1.0e39, + 1.0e40,1.0e41,1.0e42,1.0e43,1.0e44,1.0e45,1.0e46,1.0e47,1.0e48,1.0e49, + 1.0e50,1.0e51,1.0e52,1.0e53,1.0e54,1.0e55,1.0e56,1.0e57,1.0e58,1.0e59, + 1.0e60,1.0e61,1.0e62,1.0e63,1.0e64,1.0e65,1.0e66,1.0e67,1.0e68,1.0e69, +}; + +double +__fmtpow10(int n) +{ + int m; + + if(n < 0) { + n = -n; + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return 1/tab[n]; + m = n/2; + return __fmtpow10(-m) * __fmtpow10(m-n); + } + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return tab[n]; + m = n/2; + return __fmtpow10(m) * __fmtpow10(n-m); +} diff --git a/mk/libfmt/print.3 b/mk/libfmt/print.3 @@ -0,0 +1,482 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.\" diffs from /usr/local/plan9/man/man3/print.3: +.\" +.\" - include different headers +.\" - drop reference to bio(3) +.\" - change exits to exit +.\" - text about unsigned verbs +.\" - source pointer +.\" +.TH PRINT 3 +.SH NAME +print, fprint, sprint, snprint, seprint, smprint, runesprint, runesnprint, runeseprint, runesmprint, vfprint, vsnprint, vseprint, vsmprint, runevsnprint, runevseprint, runevsmprint \- print formatted output +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B #include <fmt.h> +.PP +.ta \w'\fLchar* 'u +.B +int print(char *format, ...) +.PP +.B +int fprint(int fd, char *format, ...) +.PP +.B +int sprint(char *s, char *format, ...) +.PP +.B +int snprint(char *s, int len, char *format, ...) +.PP +.B +char* seprint(char *s, char *e, char *format, ...) +.PP +.B +char* smprint(char *format, ...) +.PP +.B +int runesprint(Rune *s, char *format, ...) +.PP +.B +int runesnprint(Rune *s, int len, char *format, ...) +.PP +.B +Rune* runeseprint(Rune *s, Rune *e, char *format, ...) +.PP +.B +Rune* runesmprint(char *format, ...) +.PP +.B +int vfprint(int fd, char *format, va_list v) +.PP +.B +int vsnprint(char *s, int len, char *format, va_list v) +.PP +.B +char* vseprint(char *s, char *e, char *format, va_list v) +.PP +.B +char* vsmprint(char *format, va_list v) +.PP +.B +int runevsnprint(Rune *s, int len, char *format, va_list v) +.PP +.B +Rune* runevseprint(Rune *s, Rune *e, char *format, va_list v) +.PP +.B +Rune* runevsmprint(Rune *format, va_list v) +.PP +.B +.SH DESCRIPTION +.I Print +writes text to the standard output. +.I Fprint +writes to the named output +file descriptor: +a buffered form +is described in +.IR bio (3). +.I Sprint +places text +followed by the NUL character +.RB ( \e0 ) +in consecutive bytes starting at +.IR s ; +it is the user's responsibility to ensure that +enough storage is available. +Each function returns the number of bytes +transmitted (not including the NUL +in the case of +.IR sprint ), +or +a negative value if an output error was encountered. +.PP +.I Snprint +is like +.IR sprint , +but will not place more than +.I len +bytes in +.IR s . +Its result is always NUL-terminated and holds the maximal +number of complete UTF-8 characters that can fit. +.I Seprint +is like +.IR snprint , +except that the end is indicated by a pointer +.I e +rather than a count and the return value points to the terminating NUL of the +resulting string. +.I Smprint +is like +.IR sprint , +except that it prints into and returns a string of the required length, which is +allocated by +.IR malloc (3). +.PP +The routines +.IR runesprint , +.IR runesnprint , +.IR runeseprint , +and +.I runesmprint +are the same as +.IR sprint , +.IR snprint , +.IR seprint +and +.I smprint +except that their output is rune strings instead of byte strings. +.PP +Finally, the routines +.IR vfprint , +.IR vsnprint , +.IR vseprint , +.IR vsmprint , +.IR runevsnprint , +.IR runevseprint , +and +.I runevsmprint +are like their +.BR v-less +relatives except they take as arguments a +.B va_list +parameter, so they can be called within a variadic function. +The Example section shows a representative usage. +.PP +Each of these functions +converts, formats, and prints its +trailing arguments +under control of a +.IR format +string. +The +format +contains two types of objects: +plain characters, which are simply copied to the +output stream, +and conversion specifications, +each of which results in fetching of +zero or more +arguments. +The results are undefined if there are arguments of the +wrong type or too few +arguments for the format. +If the format is exhausted while +arguments remain, the excess +is ignored. +.PP +Each conversion specification has the following format: +.IP +.B "% [flags] verb +.PP +The verb is a single character and each flag is a single character or a +(decimal) numeric string. +Up to two numeric strings may be used; +the first is called +.IR width , +the second +.IR precision . +A period can be used to separate them, and if the period is +present then +.I width +and +.I precision +are taken to be zero if missing, otherwise they are `omitted'. +Either or both of the numbers may be replaced with the character +.BR * , +meaning that the actual number will be obtained from the argument list +as an integer. +The flags and numbers are arguments to +the +.I verb +described below. +.PP +The numeric verbs +.BR d , +.BR i , +.BR u , +.BR o , +.BR b , +.BR x , +and +.B X +format their arguments in decimal, decimal, +unsigned decimal, octal, binary, hexadecimal, and upper case hexadecimal. +Each interprets the flags +.BR 0 , +.BR h , +.BR hh , +.BR l , +.BR + , +.BR - , +.BR , , +and +.B # +to mean pad with zeros, +short, byte, long, always print a sign, left justified, commas every three digits, +and alternate format. +Also, a space character in the flag +position is like +.BR + , +but prints a space instead of a plus sign for non-negative values. +If neither +short nor long is specified, +then the argument is an +.BR int . +If an unsigned verb is specified, +then the argument is interpreted as a +positive number and no sign is output; +space and +.B + +flags are ignored for unsigned verbs. +If two +.B l +flags are given, +then the argument is interpreted as a +.B vlong +(usually an 8-byte, sometimes a 4-byte integer). +If +.I precision +is not omitted, the number is padded on the left with zeros +until at least +.I precision +digits appear. +If +.I precision +is explicitly 0, and the number is 0, +no digits are generated, and alternate formatting +does not apply. +Then, if alternate format is specified, +for +.B o +conversion, the number is preceded by a +.B 0 +if it doesn't already begin with one. +For non-zero numbers and +.B x +conversion, the number is preceded by +.BR 0x ; +for +.B X +conversion, the number is preceded by +.BR 0X . +Finally, if +.I width +is not omitted, the number is padded on the left (or right, if +left justification is specified) with enough blanks to +make the field at least +.I width +characters long. +.PP +The floating point verbs +.BR f , +.BR e , +.BR E , +.BR g , +and +.B G +take a +.B double +argument. +Each interprets the flags +.BR 0 , +.BR L +.BR + , +.BR - , +and +.B # +to mean pad with zeros, +long double argument, +always print a sign, +left justified, +and +alternate format. +.I Width +is the minimum field width and, +if the converted value takes up less than +.I width +characters, it is padded on the left (or right, if `left justified') +with spaces. +.I Precision +is the number of digits that are converted after the decimal place for +.BR e , +.BR E , +and +.B f +conversions, +and +.I precision +is the maximum number of significant digits for +.B g +and +.B G +conversions. +The +.B f +verb produces output of the form +.RB [ - ] digits [ .digits\fR]. +.B E +conversion appends an exponent +.BR E [ - ] digits , +and +.B e +conversion appends an exponent +.BR e [ - ] digits . +The +.B g +verb will output the argument in either +.B e +or +.B f +with the goal of producing the smallest output. +Also, trailing zeros are omitted from the fraction part of +the output, and a trailing decimal point appears only if it is followed +by a digit. +The +.B G +verb is similar, but uses +.B E +format instead of +.BR e . +When alternate format is specified, the result will always contain a decimal point, +and for +.B g +and +.B G +conversions, trailing zeros are not removed. +.PP +The +.B s +verb copies a string +(pointer to +.BR char ) +to the output. +The number of characters copied +.RI ( n ) +is the minimum +of the size of the string and +.IR precision . +These +.I n +characters are justified within a field of +.I width +characters as described above. +If a +.I precision +is given, it is safe for the string not to be nul-terminated +as long as it is at least +.I precision +characters (not bytes!) long. +The +.B S +verb is similar, but it interprets its pointer as an array +of runes (see +.IR utf (7)); +the runes are converted to +.SM UTF +before output. +.PP +The +.B c +verb copies a single +.B char +(promoted to +.BR int ) +justified within a field of +.I width +characters as described above. +The +.B C +verb is similar, but works on runes. +.PP +The +.B p +verb formats a pointer value. +At the moment, it is a synonym for +.BR x , +but that will change if pointers and integers are different sizes. +.PP +The +.B r +verb takes no arguments; it copies the error string returned by a call to +.IR strerror (3) +with an argument of +.IR errno. +.PP +Custom verbs may be installed using +.IR fmtinstall (3). +.SH EXAMPLE +This function prints an error message with a variable +number of arguments and then quits. +.IP +.EX +.ta 6n +6n +6n +void fatal(char *msg, ...) +{ + char buf[1024], *out; + va_list arg; + + out = seprint(buf, buf+sizeof buf, "Fatal error: "); + va_start(arg, msg); + out = vseprint(out, buf+sizeof buf, msg, arg); + va_end(arg); + write(2, buf, out-buf); + exit(1); +} +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR fmtinstall (3), +.IR fprintf (3), +.IR utf (7) +.SH DIAGNOSTICS +Routines that write to a file descriptor or call +.IR malloc +set +.IR errstr . +.SH BUGS +The formatting is close to that specified for ANSI +.IR fprintf (3); +the main difference is that +.B b +and +.B r +are not in ANSI and some +.B C9X +verbs and syntax are missing. +Also, and distinctly not a bug, +.I print +and friends generate +.SM UTF +rather than +.SM ASCII. +.PP +There is no +.IR runeprint , +.IR runefprint , +etc. because runes are byte-order dependent and should not be written directly to a file; use the +UTF output of +.I print +or +.I fprint +instead. +Also, +.I sprint +is deprecated for safety reasons; use +.IR snprint , +.IR seprint , +or +.I smprint +instead. +Safety also precludes the existence of +.IR runesprint . diff --git a/mk/libfmt/print.c b/mk/libfmt/print.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +print(char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vfprint(1, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/runefmtstr.c b/mk/libfmt/runefmtstr.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <stdlib.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runefmtstrflush(Fmt *f) +{ + if(f->start == nil) + return nil; + *(Rune*)f->to = '\0'; + f->to = f->start; + return f->start; +} diff --git a/mk/libfmt/runeseprint.c b/mk/libfmt/runeseprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runeseprint(Rune *buf, Rune *e, char *fmt, ...) +{ + Rune *p; + va_list args; + + va_start(args, fmt); + p = runevseprint(buf, e, fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/runesmprint.c b/mk/libfmt/runesmprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runesmprint(char *fmt, ...) +{ + va_list args; + Rune *p; + + va_start(args, fmt); + p = runevsmprint(fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/runesnprint.c b/mk/libfmt/runesnprint.c @@ -0,0 +1,19 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runesnprint(Rune *buf, int len, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = runevsnprint(buf, len, fmt, args); + va_end(args); + return n; +} + diff --git a/mk/libfmt/runesprint.c b/mk/libfmt/runesprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runesprint(Rune *buf, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = runevsnprint(buf, 256, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/runevseprint.c b/mk/libfmt/runevseprint.c @@ -0,0 +1,29 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runevseprint(Rune *buf, Rune *e, char *fmt, va_list args) +{ + Fmt f; + + if(e <= buf) + return nil; + f.runes = 1; + f.start = buf; + f.to = buf; + f.stop = e - 1; + f.flush = nil; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(Rune*)f.to = '\0'; + return (Rune*)f.to; +} + diff --git a/mk/libfmt/runevsmprint.c b/mk/libfmt/runevsmprint.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* + * Plan 9 port version must include libc.h in order to + * get Plan 9 debugging malloc, which sometimes returns + * different pointers than the standard malloc. + */ +#ifdef PLAN9PORT +#include <u.h> +#include <libc.h> +#include "fmtdef.h" +#else +#include <stdlib.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#endif + +static int +runeFmtStrFlush(Fmt *f) +{ + Rune *s; + int n; + + if(f->start == nil) + return 0; + n = (uintptr)f->farg; + n *= 2; + s = (Rune*)f->start; + f->start = realloc(s, sizeof(Rune)*n); + if(f->start == nil){ + f->farg = nil; + f->to = nil; + f->stop = nil; + free(s); + return 0; + } + f->farg = (void*)(uintptr)n; + f->to = (Rune*)f->start + ((Rune*)f->to - s); + f->stop = (Rune*)f->start + n - 1; + return 1; +} + +int +runefmtstrinit(Fmt *f) +{ + int n; + + memset(f, 0, sizeof *f); + f->runes = 1; + n = 32; + f->start = malloc(sizeof(Rune)*n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (Rune*)f->start + n - 1; + f->flush = runeFmtStrFlush; + f->farg = (void*)(uintptr)n; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + +/* + * print into an allocated string buffer + */ +Rune* +runevsmprint(char *fmt, va_list args) +{ + Fmt f; + int n; + + if(runefmtstrinit(&f) < 0) + return nil; + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(f.start == nil) + return nil; + if(n < 0){ + free(f.start); + return nil; + } + *(Rune*)f.to = '\0'; + return (Rune*)f.start; +} diff --git a/mk/libfmt/runevsnprint.c b/mk/libfmt/runevsnprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runevsnprint(Rune *buf, int len, char *fmt, va_list args) +{ + Fmt f; + + if(len <= 0) + return -1; + f.runes = 1; + f.start = buf; + f.to = buf; + f.stop = buf + len - 1; + f.flush = nil; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(Rune*)f.to = '\0'; + return (Rune*)f.to - buf; +} diff --git a/mk/libfmt/seprint.c b/mk/libfmt/seprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +seprint(char *buf, char *e, char *fmt, ...) +{ + char *p; + va_list args; + + va_start(args, fmt); + p = vseprint(buf, e, fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/smprint.c b/mk/libfmt/smprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +smprint(char *fmt, ...) +{ + va_list args; + char *p; + + va_start(args, fmt); + p = vsmprint(fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/snprint.c b/mk/libfmt/snprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +snprint(char *buf, int len, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vsnprint(buf, len, fmt, args); + va_end(args); + return n; +} + diff --git a/mk/libfmt/sprint.c b/mk/libfmt/sprint.c @@ -0,0 +1,30 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <fmt.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +sprint(char *buf, char *fmt, ...) +{ + int n; + uint len; + va_list args; + + len = 1<<30; /* big number, but sprint is deprecated anyway */ + /* + * on PowerPC, the stack is near the top of memory, so + * we must be sure not to overflow a 32-bit pointer. + * + * careful! gcc-4.2 assumes buf+len < buf can never be true and + * optimizes the test away. casting to uintptr works around this bug. + */ + if((uintptr)buf+len < (uintptr)buf) + len = -(uintptr)buf-1; + + va_start(args, fmt); + n = vsnprint(buf, len, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/strtod.c b/mk/libfmt/strtod.c @@ -0,0 +1,520 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <math.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +static ulong +umuldiv(ulong a, ulong b, ulong c) +{ + double d; + + d = ((double)a * (double)b) / (double)c; + if(d >= 4294967295.) + d = 4294967295.; + return (ulong)d; +} + +/* + * This routine will convert to arbitrary precision + * floating point entirely in multi-precision fixed. + * The answer is the closest floating point number to + * the given decimal number. Exactly half way are + * rounded ala ieee rules. + * Method is to scale input decimal between .500 and .999... + * with external power of 2, then binary search for the + * closest mantissa to this decimal number. + * Nmant is is the required precision. (53 for ieee dp) + * Nbits is the max number of bits/word. (must be <= 28) + * Prec is calculated - the number of words of fixed mantissa. + */ +enum +{ + Nbits = 28, /* bits safely represented in a ulong */ + Nmant = 53, /* bits of precision required */ + Prec = (Nmant+Nbits+1)/Nbits, /* words of Nbits each to represent mantissa */ + Sigbit = 1<<(Prec*Nbits-Nmant), /* first significant bit of Prec-th word */ + Ndig = 1500, + One = (ulong)(1<<Nbits), + Half = (ulong)(One>>1), + Maxe = 310, + + Fsign = 1<<0, /* found - */ + Fesign = 1<<1, /* found e- */ + Fdpoint = 1<<2, /* found . */ + + S0 = 0, /* _ _S0 +S1 #S2 .S3 */ + S1, /* _+ #S2 .S3 */ + S2, /* _+# #S2 .S4 eS5 */ + S3, /* _+. #S4 */ + S4, /* _+#.# #S4 eS5 */ + S5, /* _+#.#e +S6 #S7 */ + S6, /* _+#.#e+ #S7 */ + S7 /* _+#.#e+# #S7 */ +}; + +static int xcmp(char*, char*); +static int fpcmp(char*, ulong*); +static void frnorm(ulong*); +static void divascii(char*, int*, int*, int*); +static void mulascii(char*, int*, int*, int*); + +typedef struct Tab Tab; +struct Tab +{ + int bp; + int siz; + char* cmp; +}; + +double +fmtstrtod(const char *as, char **aas) +{ + int na, ex, dp, bp, c, i, flag, state; + ulong low[Prec], hig[Prec], mid[Prec]; + double d; + char *s, a[Ndig]; + + flag = 0; /* Fsign, Fesign, Fdpoint */ + na = 0; /* number of digits of a[] */ + dp = 0; /* na of decimal point */ + ex = 0; /* exonent */ + + state = S0; + for(s=(char*)as;; s++) { + c = *s; + if(c >= '0' && c <= '9') { + switch(state) { + case S0: + case S1: + case S2: + state = S2; + break; + case S3: + case S4: + state = S4; + break; + + case S5: + case S6: + case S7: + state = S7; + ex = ex*10 + (c-'0'); + continue; + } + if(na == 0 && c == '0') { + dp--; + continue; + } + if(na < Ndig-50) + a[na++] = c; + continue; + } + switch(c) { + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case ' ': + if(state == S0) + continue; + break; + case '-': + if(state == S0) + flag |= Fsign; + else + flag |= Fesign; + case '+': + if(state == S0) + state = S1; + else + if(state == S5) + state = S6; + else + break; /* syntax */ + continue; + case '.': + flag |= Fdpoint; + dp = na; + if(state == S0 || state == S1) { + state = S3; + continue; + } + if(state == S2) { + state = S4; + continue; + } + break; + case 'e': + case 'E': + if(state == S2 || state == S4) { + state = S5; + continue; + } + break; + } + break; + } + + /* + * clean up return char-pointer + */ + switch(state) { + case S0: + if(xcmp(s, "nan") == 0) { + if(aas != nil) + *aas = s+3; + goto retnan; + } + case S1: + if(xcmp(s, "infinity") == 0) { + if(aas != nil) + *aas = s+8; + goto retinf; + } + if(xcmp(s, "inf") == 0) { + if(aas != nil) + *aas = s+3; + goto retinf; + } + case S3: + if(aas != nil) + *aas = (char*)as; + goto ret0; /* no digits found */ + case S6: + s--; /* back over +- */ + case S5: + s--; /* back over e */ + break; + } + if(aas != nil) + *aas = s; + + if(flag & Fdpoint) + while(na > 0 && a[na-1] == '0') + na--; + if(na == 0) + goto ret0; /* zero */ + a[na] = 0; + if(!(flag & Fdpoint)) + dp = na; + if(flag & Fesign) + ex = -ex; + dp += ex; + if(dp < -Maxe){ + errno = ERANGE; + goto ret0; /* underflow by exp */ + } else + if(dp > +Maxe) + goto retinf; /* overflow by exp */ + + /* + * normalize the decimal ascii number + * to range .[5-9][0-9]* e0 + */ + bp = 0; /* binary exponent */ + while(dp > 0) + divascii(a, &na, &dp, &bp); + while(dp < 0 || a[0] < '5') + mulascii(a, &na, &dp, &bp); + + /* close approx by naive conversion */ + mid[0] = 0; + mid[1] = 1; + for(i=0; (c=a[i]) != '\0'; i++) { + mid[0] = mid[0]*10 + (c-'0'); + mid[1] = mid[1]*10; + if(i >= 8) + break; + } + low[0] = umuldiv(mid[0], One, mid[1]); + hig[0] = umuldiv(mid[0]+1, One, mid[1]); + for(i=1; i<Prec; i++) { + low[i] = 0; + hig[i] = One-1; + } + + /* binary search for closest mantissa */ + for(;;) { + /* mid = (hig + low) / 2 */ + c = 0; + for(i=0; i<Prec; i++) { + mid[i] = hig[i] + low[i]; + if(c) + mid[i] += One; + c = mid[i] & 1; + mid[i] >>= 1; + } + frnorm(mid); + + /* compare */ + c = fpcmp(a, mid); + if(c > 0) { + c = 1; + for(i=0; i<Prec; i++) + if(low[i] != mid[i]) { + c = 0; + low[i] = mid[i]; + } + if(c) + break; /* between mid and hig */ + continue; + } + if(c < 0) { + for(i=0; i<Prec; i++) + hig[i] = mid[i]; + continue; + } + + /* only hard part is if even/odd roundings wants to go up */ + c = mid[Prec-1] & (Sigbit-1); + if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0) + mid[Prec-1] -= c; + break; /* exactly mid */ + } + + /* normal rounding applies */ + c = mid[Prec-1] & (Sigbit-1); + mid[Prec-1] -= c; + if(c >= Sigbit/2) { + mid[Prec-1] += Sigbit; + frnorm(mid); + } + goto out; + +ret0: + return 0; + +retnan: + return __NaN(); + +retinf: + /* + * Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ + errno = ERANGE; + if(flag & Fsign) + return -HUGE_VAL; + return HUGE_VAL; + +out: + d = 0; + for(i=0; i<Prec; i++) + d = d*One + mid[i]; + if(flag & Fsign) + d = -d; + d = ldexp(d, bp - Prec*Nbits); + if(d == 0){ /* underflow */ + errno = ERANGE; + } + return d; +} + +static void +frnorm(ulong *f) +{ + int i, c; + + c = 0; + for(i=Prec-1; i>0; i--) { + f[i] += c; + c = f[i] >> Nbits; + f[i] &= One-1; + } + f[0] += c; +} + +static int +fpcmp(char *a, ulong* f) +{ + ulong tf[Prec]; + int i, d, c; + + for(i=0; i<Prec; i++) + tf[i] = f[i]; + + for(;;) { + /* tf *= 10 */ + for(i=0; i<Prec; i++) + tf[i] = tf[i]*10; + frnorm(tf); + d = (tf[0] >> Nbits) + '0'; + tf[0] &= One-1; + + /* compare next digit */ + c = *a; + if(c == 0) { + if('0' < d) + return -1; + if(tf[0] != 0) + goto cont; + for(i=1; i<Prec; i++) + if(tf[i] != 0) + goto cont; + return 0; + } + if(c > d) + return +1; + if(c < d) + return -1; + a++; + cont:; + } +} + +static void +divby(char *a, int *na, int b) +{ + int n, c; + char *p; + + p = a; + n = 0; + while(n>>b == 0) { + c = *a++; + if(c == 0) { + while(n) { + c = n*10; + if(c>>b) + break; + n = c; + } + goto xx; + } + n = n*10 + c-'0'; + (*na)--; + } + for(;;) { + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + c = *a++; + if(c == 0) + break; + n = n*10 + c-'0'; + } + (*na)++; +xx: + while(n) { + n = n*10; + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + (*na)++; + } + *p = 0; +} + +static Tab tab1[] = +{ + 1, 0, "", + 3, 1, "7", + 6, 2, "63", + 9, 3, "511", + 13, 4, "8191", + 16, 5, "65535", + 19, 6, "524287", + 23, 7, "8388607", + 26, 8, "67108863", + 27, 9, "134217727", +}; + +static void +divascii(char *a, int *na, int *dp, int *bp) +{ + int b, d; + Tab *t; + + d = *dp; + if(d >= (int)(nelem(tab1))) + d = (int)(nelem(tab1))-1; + t = tab1 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) > 0) + d--; + *dp -= d; + *bp += b; + divby(a, na, b); +} + +static void +mulby(char *a, char *p, char *q, int b) +{ + int n, c; + + n = 0; + *p = 0; + for(;;) { + q--; + if(q < a) + break; + c = *q - '0'; + c = (c<<b) + n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } + while(n) { + c = n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } +} + +static Tab tab2[] = +{ + 1, 1, "", /* dp = 0-0 */ + 3, 3, "125", + 6, 5, "15625", + 9, 7, "1953125", + 13, 10, "1220703125", + 16, 12, "152587890625", + 19, 14, "19073486328125", + 23, 17, "11920928955078125", + 26, 19, "1490116119384765625", + 27, 19, "7450580596923828125", /* dp 8-9 */ +}; + +static void +mulascii(char *a, int *na, int *dp, int *bp) +{ + char *p; + int d, b; + Tab *t; + + d = -*dp; + if(d >= (int)(nelem(tab2))) + d = (int)(nelem(tab2))-1; + t = tab2 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) < 0) + d--; + p = a + *na; + *bp -= b; + *dp += d; + *na += d; + mulby(a, p+d, p, b); +} + +static int +xcmp(char *a, char *b) +{ + int c1, c2; + + while((c1 = *b++) != '\0') { + c2 = *a++; + if(isupper(c2)) + c2 = tolower(c2); + if(c1 != c2) + return 1; + } + return 0; +} diff --git a/mk/libfmt/test.c b/mk/libfmt/test.c @@ -0,0 +1,53 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdio.h> +#include <stdarg.h> +#include <utf.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +main(int argc, char *argv[]) +{ + quotefmtinstall(); + print("hello world\n"); + print("x: %x\n", 0x87654321); + print("u: %u\n", 0x87654321); + print("d: %d\n", 0x87654321); + print("s: %s\n", "hi there"); + print("q: %q\n", "hi i'm here"); + print("c: %c\n", '!'); + print("g: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10); + print("e: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10); + print("f: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10); + print("smiley: %C\n", (Rune)0x263a); + print("%g %.18g\n", 2e25, 2e25); + print("%2.18g\n", 1.0); + print("%2.18f\n", 1.0); + print("%f\n", 3.1415927/4); + print("%d\n", 23); + print("%i\n", 23); + print("%0.10d\n", 12345); + + /* test %4$d formats */ + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); + print("%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); + print("%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); + + /* test %'d formats */ + print("%'d %'d %'d\n", 1, 2222, 33333333); + print("%'019d\n", 0); + print("%08d %08d %08d\n", 1, 2222, 33333333); + print("%'08d %'08d %'08d\n", 1, 2222, 33333333); + print("%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); + print("%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); + return 0; +} diff --git a/mk/libfmt/test2.c b/mk/libfmt/test2.c @@ -0,0 +1,9 @@ +#include <stdarg.h> +#include <utf.h> +#include <fmt.h> + +int +main(int argc, char **argv) +{ + print("%020.10d\n", 100); +} diff --git a/mk/libfmt/test3.c b/mk/libfmt/test3.c @@ -0,0 +1,52 @@ +#include <u.h> +#include <libc.h> +#include <stdio.h> + +void +test(char *fmt, ...) +{ + va_list arg; + char fmtbuf[100], stdbuf[100]; + + va_start(arg, fmt); + vsnprint(fmtbuf, sizeof fmtbuf, fmt, arg); + va_end(arg); + + va_start(arg, fmt); + vsnprint(stdbuf, sizeof stdbuf, fmt, arg); + va_end(arg); + + if(strcmp(fmtbuf, stdbuf) != 0) + print("fmt %s: fmt=\"%s\" std=\"%s\"\n", fmt, fmtbuf, stdbuf); + + print("fmt %s: %s\n", fmt, fmtbuf); +} + + +int +main(int argc, char *argv[]) +{ + test("%f", 3.14159); + test("%f", 3.14159e10); + test("%f", 3.14159e-10); + + test("%e", 3.14159); + test("%e", 3.14159e10); + test("%e", 3.14159e-10); + + test("%g", 3.14159); + test("%g", 3.14159e10); + test("%g", 3.14159e-10); + + test("%g", 2e25); + test("%.18g", 2e25); + + test("%2.18g", 1.0); + test("%2.18f", 1.0); + test("%f", 3.1415927/4); + + test("%20.10d", 12345); + test("%0.10d", 12345); + + return 0; +} diff --git a/mk/libfmt/vfprint.c b/mk/libfmt/vfprint.c @@ -0,0 +1,21 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +vfprint(int fd, char *fmt, va_list args) +{ + Fmt f; + char buf[256]; + int n; + + fmtfdinit(&f, fd, buf, sizeof(buf)); + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(n > 0 && __fmtFdFlush(&f) == 0) + return -1; + return n; +} diff --git a/mk/libfmt/vseprint.c b/mk/libfmt/vseprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +vseprint(char *buf, char *e, char *fmt, va_list args) +{ + Fmt f; + + if(e <= buf) + return nil; + f.runes = 0; + f.start = buf; + f.to = buf; + f.stop = e - 1; + f.flush = 0; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(char*)f.to = '\0'; + return (char*)f.to; +} + diff --git a/mk/libfmt/vsmprint.c b/mk/libfmt/vsmprint.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* + * Plan 9 port version must include libc.h in order to + * get Plan 9 debugging malloc, which sometimes returns + * different pointers than the standard malloc. + */ +#ifdef PLAN9PORT +#include <u.h> +#include <libc.h> +#include "fmtdef.h" +#else +#include <stdlib.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#endif + +static int +fmtStrFlush(Fmt *f) +{ + char *s; + int n; + + if(f->start == nil) + return 0; + n = (uintptr)f->farg; + n *= 2; + s = (char*)f->start; + f->start = realloc(s, n); + if(f->start == nil){ + f->farg = nil; + f->to = nil; + f->stop = nil; + free(s); + return 0; + } + f->farg = (void*)(uintptr)n; + f->to = (char*)f->start + ((char*)f->to - s); + f->stop = (char*)f->start + n - 1; + return 1; +} + +int +fmtstrinit(Fmt *f) +{ + int n; + + memset(f, 0, sizeof *f); + f->runes = 0; + n = 32; + f->start = malloc(n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (char*)f->start + n - 1; + f->flush = fmtStrFlush; + f->farg = (void*)(uintptr)n; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + +/* + * print into an allocated string buffer + */ +char* +vsmprint(char *fmt, va_list args) +{ + Fmt f; + int n; + + if(fmtstrinit(&f) < 0) + return nil; + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(n < 0){ + free(f.start); + return nil; + } + return fmtstrflush(&f); +} diff --git a/mk/libfmt/vsnprint.c b/mk/libfmt/vsnprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +vsnprint(char *buf, int len, char *fmt, va_list args) +{ + Fmt f; + + if(len <= 0) + return -1; + f.runes = 0; + f.start = buf; + f.to = buf; + f.stop = buf + len - 1; + f.flush = 0; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(char*)f.to = '\0'; + return (char*)f.to - buf; +} diff --git a/mk/libregexp/NOTICE b/mk/libregexp/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software is Rob Pike. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 regular expression library. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libregexp/README b/mk/libregexp/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libregexp/depsinc.mk b/mk/libregexp/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libregexp_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libregexp_DEPDIR -lregexp9 diff --git a/mk/libregexp/lib9.h b/mk/libregexp/lib9.h @@ -0,0 +1,10 @@ +#include <fmt.h> +#include <setjmp.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#define exits(x) exit(x && *x ? 1 : 0) + +#define nil 0 + diff --git a/mk/libregexp/mkfile b/mk/libregexp/mkfile @@ -0,0 +1,15 @@ +LIB = libregexp9.a +LOBJ = \ + regcomp.o\ + regerror.o\ + regexec.o\ + regsub.o\ + regaux.o\ + rregexec.o\ + rregsub.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include +DEPS = libfmt libutf + +<$mkbuild/mk.default + diff --git a/mk/libregexp/regaux.c b/mk/libregexp/regaux.c @@ -0,0 +1,112 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * save a new match in mp + */ +extern void +_renewmatch(Resub *mp, int ms, Resublist *sp) +{ + int i; + + if(mp==0 || ms<=0) + return; + if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp || + (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ + for(i=0; i<ms && i<NSUBEXP; i++) + mp[i] = sp->m[i]; + for(; i<ms; i++) + mp[i].s.sp = mp[i].e.ep = 0; + } +} + +/* + * Note optimization in _renewthread: + * *lp must be pending when _renewthread called; if *l has been looked + * at already, the optimization is a bug. + */ +extern Relist* +_renewthread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Resublist *sep) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sep->m[0].s.sp < p->se.m[0].s.sp){ + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + (++p)->inst = 0; + return p; +} + +/* + * same as renewthread, but called with + * initial empty start pointer. + */ +extern Relist* +_renewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + char *sp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sp < p->se.m[0].s.sp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + (++p)->inst = 0; + return p; +} + +extern Relist* +_rrenewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Rune *rsp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; +} diff --git a/mk/libregexp/regcomp.c b/mk/libregexp/regcomp.c @@ -0,0 +1,555 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +#define TRUE 1 +#define FALSE 0 + +/* + * Parser Information + */ +typedef +struct Node +{ + Reinst* first; + Reinst* last; +}Node; + +#define NSTACK 20 +static Node andstack[NSTACK]; +static Node *andp; +static int atorstack[NSTACK]; +static int* atorp; +static int cursubid; /* id of current subexpression */ +static int subidstack[NSTACK]; /* parallel to atorstack */ +static int* subidp; +static int lastwasand; /* Last token was operand */ +static int nbra; +static char* exprp; /* pointer to next character in source expression */ +static int lexdone; +static int nclass; +static Reclass*classp; +static Reinst* freep; +static int errors; +static Rune yyrune; /* last lex'd rune */ +static Reclass*yyclassp; /* last lex'd class */ + +/* predeclared crap */ +static void operator(int); +static void pushand(Reinst*, Reinst*); +static void pushator(int); +static void evaluntil(int); +static int bldcclass(void); + +static jmp_buf regkaboom; + +static void +rcerror(char *s) +{ + errors++; + regerror(s); + longjmp(regkaboom, 1); +} + +static Reinst* +newinst(int t) +{ + freep->type = t; + freep->u2.left = 0; + freep->u1.right = 0; + return freep++; +} + +static void +operand(int t) +{ + Reinst *i; + + if(lastwasand) + operator(CAT); /* catenate is implicit */ + i = newinst(t); + + if(t == CCLASS || t == NCCLASS) + i->u1.cp = yyclassp; + if(t == RUNE) + i->u1.r = yyrune; + + pushand(i, i); + lastwasand = TRUE; +} + +static void +operator(int t) +{ + if(t==RBRA && --nbra<0) + rcerror("unmatched right paren"); + if(t==LBRA){ + if(++cursubid >= NSUBEXP) + rcerror ("too many subexpressions"); + nbra++; + if(lastwasand) + operator(CAT); + } else + evaluntil(t); + if(t != RBRA) + pushator(t); + lastwasand = FALSE; + if(t==STAR || t==QUEST || t==PLUS || t==RBRA) + lastwasand = TRUE; /* these look like operands */ +} + +static void +regerr2(char *s, int c) +{ + char buf[100]; + char *cp = buf; + while(*s) + *cp++ = *s++; + *cp++ = c; + *cp = '\0'; + rcerror(buf); +} + +static void +cant(char *s) +{ + char buf[100]; + strcpy(buf, "can't happen: "); + strcat(buf, s); + rcerror(buf); +} + +static void +pushand(Reinst *f, Reinst *l) +{ + if(andp >= &andstack[NSTACK]) + cant("operand stack overflow"); + andp->first = f; + andp->last = l; + andp++; +} + +static void +pushator(int t) +{ + if(atorp >= &atorstack[NSTACK]) + cant("operator stack overflow"); + *atorp++ = t; + *subidp++ = cursubid; +} + +static Node* +popand(int op) +{ + Reinst *inst; + + if(andp <= &andstack[0]){ + regerr2("missing operand for ", op); + inst = newinst(NOP); + pushand(inst,inst); + } + return --andp; +} + +static int +popator(void) +{ + if(atorp <= &atorstack[0]) + cant("operator stack underflow"); + --subidp; + return *--atorp; +} + +static void +evaluntil(int pri) +{ + Node *op1, *op2; + Reinst *inst1, *inst2; + + while(pri==RBRA || atorp[-1]>=pri){ + switch(popator()){ + default: + rcerror("unknown operator in evaluntil"); + break; + case LBRA: /* must have been RBRA */ + op1 = popand('('); + inst2 = newinst(RBRA); + inst2->u1.subid = *subidp; + op1->last->u2.next = inst2; + inst1 = newinst(LBRA); + inst1->u1.subid = *subidp; + inst1->u2.next = op1->first; + pushand(inst1, inst2); + return; + case OR: + op2 = popand('|'); + op1 = popand('|'); + inst2 = newinst(NOP); + op2->last->u2.next = inst2; + op1->last->u2.next = inst2; + inst1 = newinst(OR); + inst1->u1.right = op1->first; + inst1->u2.left = op2->first; + pushand(inst1, inst2); + break; + case CAT: + op2 = popand(0); + op1 = popand(0); + op1->last->u2.next = op2->first; + pushand(op1->first, op2->last); + break; + case STAR: + op2 = popand('*'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(inst1, inst1); + break; + case PLUS: + op2 = popand('+'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(op2->first, inst1); + break; + case QUEST: + op2 = popand('?'); + inst1 = newinst(OR); + inst2 = newinst(NOP); + inst1->u2.left = inst2; + inst1->u1.right = op2->first; + op2->last->u2.next = inst2; + pushand(inst1, inst2); + break; + } + } +} + +static Reprog* +optimize(Reprog *pp) +{ + Reinst *inst, *target; + int size; + Reprog *npp; + Reclass *cl; + int diff; + + /* + * get rid of NOOP chains + */ + for(inst=pp->firstinst; inst->type!=END; inst++){ + target = inst->u2.next; + while(target->type == NOP) + target = target->u2.next; + inst->u2.next = target; + } + + /* + * The original allocation is for an area larger than + * necessary. Reallocate to the actual space used + * and then relocate the code. + */ + size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); + npp = realloc(pp, size); + if(npp==0 || npp==pp) + return pp; + diff = (char *)npp - (char *)pp; + freep = (Reinst *)((char *)freep + diff); + for(inst=npp->firstinst; inst<freep; inst++){ + switch(inst->type){ + case OR: + case STAR: + case PLUS: + case QUEST: + inst->u1.right = (void*)((char*)inst->u1.right + diff); + break; + case CCLASS: + case NCCLASS: + inst->u1.right = (void*)((char*)inst->u1.right + diff); + cl = inst->u1.cp; + cl->end = (void*)((char*)cl->end + diff); + break; + } + inst->u2.left = (void*)((char*)inst->u2.left + diff); + } + npp->startinst = (void*)((char*)npp->startinst + diff); + return npp; +} + +#ifdef DEBUG +static void +dumpstack(void){ + Node *stk; + int *ip; + + print("operators\n"); + for(ip=atorstack; ip<atorp; ip++) + print("0%o\n", *ip); + print("operands\n"); + for(stk=andstack; stk<andp; stk++) + print("0%o\t0%o\n", stk->first->type, stk->last->type); +} + +static void +dump(Reprog *pp) +{ + Reinst *l; + Rune *p; + + l = pp->firstinst; + do{ + print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, + l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); + if(l->type == RUNE) + print("\t%C\n", l->u1.r); + else if(l->type == CCLASS || l->type == NCCLASS){ + print("\t["); + if(l->type == NCCLASS) + print("^"); + for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2) + if(p[0] == p[1]) + print("%C", p[0]); + else + print("%C-%C", p[0], p[1]); + print("]\n"); + } else + print("\n"); + }while(l++->type); +} +#endif + +static Reclass* +newclass(void) +{ + if(nclass >= NCLASS) + regerr2("too many character classes; limit", NCLASS+'0'); + return &(classp[nclass++]); +} + +static int +nextc(Rune *rp) +{ + if(lexdone){ + *rp = 0; + return 1; + } + exprp += chartorune(rp, exprp); + if(*rp == '\\'){ + exprp += chartorune(rp, exprp); + return 1; + } + if(*rp == 0) + lexdone = 1; + return 0; +} + +static int +lex(int literal, int dot_type) +{ + int quoted; + + quoted = nextc(&yyrune); + if(literal || quoted){ + if(yyrune == 0) + return END; + return RUNE; + } + + switch(yyrune){ + case 0: + return END; + case '*': + return STAR; + case '?': + return QUEST; + case '+': + return PLUS; + case '|': + return OR; + case '.': + return dot_type; + case '(': + return LBRA; + case ')': + return RBRA; + case '^': + return BOL; + case '$': + return EOL; + case '[': + return bldcclass(); + } + return RUNE; +} + +static int +bldcclass(void) +{ + int type; + Rune r[NCCRUNE]; + Rune *p, *ep, *np; + Rune rune; + int quoted; + + /* we have already seen the '[' */ + type = CCLASS; + yyclassp = newclass(); + + /* look ahead for negation */ + /* SPECIAL CASE!!! negated classes don't match \n */ + ep = r; + quoted = nextc(&rune); + if(!quoted && rune == '^'){ + type = NCCLASS; + quoted = nextc(&rune); + *ep++ = '\n'; + *ep++ = '\n'; + } + + /* parse class into a set of spans */ + for(; ep<&r[NCCRUNE];){ + if(rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + if(!quoted && rune == ']') + break; + if(!quoted && rune == '-'){ + if(ep == r){ + rcerror("malformed '[]'"); + return 0; + } + quoted = nextc(&rune); + if((!quoted && rune == ']') || rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + *(ep-1) = rune; + } else { + *ep++ = rune; + *ep++ = rune; + } + quoted = nextc(&rune); + } + + /* sort on span start */ + for(p = r; p < ep; p += 2){ + for(np = p; np < ep; np += 2) + if(*np < *p){ + rune = np[0]; + np[0] = p[0]; + p[0] = rune; + rune = np[1]; + np[1] = p[1]; + p[1] = rune; + } + } + + /* merge spans */ + np = yyclassp->spans; + p = r; + if(r == ep) + yyclassp->end = np; + else { + np[0] = *p++; + np[1] = *p++; + for(; p < ep; p += 2) + if(p[0] <= np[1]){ + if(p[1] > np[1]) + np[1] = p[1]; + } else { + np += 2; + np[0] = p[0]; + np[1] = p[1]; + } + yyclassp->end = np+2; + } + + return type; +} + +static Reprog* +regcomp1(char *s, int literal, int dot_type) +{ + int token; + Reprog *volatile pp; + + /* get memory for the program */ + pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); + if(pp == 0){ + regerror("out of memory"); + return 0; + } + freep = pp->firstinst; + classp = pp->class; + errors = 0; + + if(setjmp(regkaboom)) + goto out; + + /* go compile the sucker */ + lexdone = 0; + exprp = s; + nclass = 0; + nbra = 0; + atorp = atorstack; + andp = andstack; + subidp = subidstack; + lastwasand = FALSE; + cursubid = 0; + + /* Start with a low priority operator to prime parser */ + pushator(START-1); + while((token = lex(literal, dot_type)) != END){ + if((token&0300) == OPERATOR) + operator(token); + else + operand(token); + } + + /* Close with a low priority operator */ + evaluntil(START); + + /* Force END */ + operand(END); + evaluntil(START); +#ifdef DEBUG + dumpstack(); +#endif + if(nbra) + rcerror("unmatched left paren"); + --andp; /* points to first and only operand */ + pp->startinst = andp->first; +#ifdef DEBUG + dump(pp); +#endif + pp = optimize(pp); +#ifdef DEBUG + print("start: %d\n", andp->first-pp->firstinst); + dump(pp); +#endif +out: + if(errors){ + free(pp); + pp = 0; + } + return pp; +} + +extern Reprog* +regcomp(char *s) +{ + return regcomp1(s, 0, ANY); +} + +extern Reprog* +regcomplit(char *s) +{ + return regcomp1(s, 1, ANY); +} + +extern Reprog* +regcompnl(char *s) +{ + return regcomp1(s, 0, ANYNL); +} diff --git a/mk/libregexp/regcomp.h b/mk/libregexp/regcomp.h @@ -0,0 +1,74 @@ +/* + * substitution list + */ +#define uchar __reuchar +typedef unsigned char uchar; +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* max character classes per program */ +extern Reprog RePrOg; +#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) + +/* max rune ranges per character class */ +#define NCCRUNE (sizeof(Reclass)/sizeof(Rune)) + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (10*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/mk/libregexp/regerror.c b/mk/libregexp/regerror.c @@ -0,0 +1,14 @@ +#include "lib9.h" +#include "regexp9.h" + +void +regerror(char *s) +{ + char buf[132]; + + strcpy(buf, "regerror: "); + strcat(buf, s); + strcat(buf, "\n"); + write(2, buf, strlen(buf)); + exits("regerr"); +} diff --git a/mk/libregexp/regexec.c b/mk/libregexp/regexec.c @@ -0,0 +1,231 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +regexec1(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + char *s; + int i, checkstart; + Rune r, *rp, *ep; + int n; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + char *p; + + match = 0; + checkstart = j->starttype; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.sp = 0; + mp[i].e.ep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->starts; + do{ + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = utfrune(s, j->startchar); + if(p == 0 || s == j->eol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = utfrune(s, '\n'); + if(p == 0 || s == j->eol) + return match; + s = p+1; + break; + } + } + r = *(uchar*)s; + if(r < Runeself) + n = 1; + else + n = chartorune(&r, s); + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + if(match == 0) + _renewemptythread(tl, progp->startinst, ms, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ + for(inst = tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + } + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.sp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.ep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->eol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.ep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->eol) + break; + checkstart = j->starttype && nl->inst==0; + s += n; + }while(r); + return match; +} + +static int +regexec2(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + int rv; + Relist *relist0, *relist1; + + /* mark space */ + relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist0 == nil) + return -1; + relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist1 == nil){ + free(relist1); + return -1; + } + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + BIGLISTSIZE - 2; + j->reliste[1] = relist1 + BIGLISTSIZE - 2; + + rv = regexec1(progp, bol, mp, ms, j); + free(relist0); + free(relist1); + return rv; +} + +extern int +regexec(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.starts = bol; + j.eol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.starts = mp->s.sp; + if(mp->e.ep) + j.eol = mp->e.ep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = regexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = regexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/mk/libregexp/regexp9.3 b/mk/libregexp/regexp9.3 @@ -0,0 +1,220 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH REGEXP9 3 +.SH NAME +regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression +.SH SYNOPSIS +.B #include <utf.h> +.br +.B #include <fmt.h> +.br +.B #include <regexp9.h> +.PP +.ta \w'\fLRegprog 'u +.B +Reprog *regcomp(char *exp) +.PP +.B +Reprog *regcomplit(char *exp) +.PP +.B +Reprog *regcompnl(char *exp) +.PP +.nf +.B +int regexec(Reprog *prog, char *string, Resub *match, int msize) +.PP +.nf +.B +void regsub(char *source, char *dest, int dlen, Resub *match, int msize) +.PP +.nf +.B +int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) +.PP +.nf +.B +void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) +.PP +.B +void regerror(char *msg) +.SH DESCRIPTION +.I Regcomp +compiles a +regular expression and returns +a pointer to the generated description. +The space is allocated by +.IR malloc (3) +and may be released by +.IR free . +Regular expressions are exactly as in +.IR regexp9 (7). +.PP +.I Regcomplit +is like +.I regcomp +except that all characters are treated literally. +.I Regcompnl +is like +.I regcomp +except that the +.B . +metacharacter matches all characters, including newlines. +.PP +.I Regexec +matches a null-terminated +.I string +against the compiled regular expression in +.IR prog . +If it matches, +.I regexec +returns +.B 1 +and fills in the array +.I match +with character pointers to the substrings of +.I string +that correspond to the +parenthesized subexpressions of +.IR exp : +.BI match[ i ].sp +points to the beginning and +.BI match[ i ].ep +points just beyond +the end of the +.IR i th +substring. +(Subexpression +.I i +begins at the +.IR i th +left parenthesis, counting from 1.) +Pointers in +.B match[0] +pick out the substring that corresponds to +the whole regular expression. +Unused elements of +.I match +are filled with zeros. +Matches involving +.LR * , +.LR + , +and +.L ? +are extended as far as possible. +The number of array elements in +.I match +is given by +.IR msize . +The structure of elements of +.I match +is: +.IP +.EX +typedef struct { + union { + char *sp; + Rune *rsp; + }; + union { + char *ep; + Rune *rep; + }; +} Resub; +.EE +.LP +If +.B match[0].sp +is nonzero on entry, +.I regexec +starts matching at that point within +.IR string . +If +.B match[0].ep +is nonzero on entry, +the last character matched is the one +preceding that point. +.PP +.I Regsub +places in +.I dest +a substitution instance of +.I source +in the context of the last +.I regexec +performed using +.IR match . +Each instance of +.BI \e n\f1, +where +.I n +is a digit, is replaced by the +string delimited by +.BI match[ n ].sp +and +.BI match[ n ].ep\f1. +Each instance of +.L & +is replaced by the string delimited by +.B match[0].sp +and +.BR match[0].ep . +The substitution will always be null terminated and +trimmed to fit into dlen bytes. +.PP +.IR Regerror , +called whenever an error is detected in +.IR regcomp , +writes the string +.I msg +on the standard error file and exits. +.I Regerror +can be replaced to perform +special error processing. +If the user supplied +.I regerror +returns rather than exits, +.I regcomp +will return 0. +.PP +.I Rregexec +and +.I rregsub +are variants of +.I regexec +and +.I regsub +that use strings of +.B Runes +instead of strings of +.BR chars . +With these routines, the +.I rsp +and +.I rep +fields of the +.I match +array elements should be used. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH "SEE ALSO" +.IR grep (1) +.SH DIAGNOSTICS +.I Regcomp +returns +.B 0 +for an illegal expression +or other failure. +.I Regexec +returns 0 +if +.I string +is not matched. +.SH BUGS +There is no way to specify or match a NUL character; NULs terminate patterns and strings. diff --git a/mk/libregexp/regexp9.7 b/mk/libregexp/regexp9.7 @@ -0,0 +1,141 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH REGEXP9 7 +.SH NAME +regexp \- Plan 9 regular expression notation +.SH DESCRIPTION +This manual page describes the regular expression +syntax used by the Plan 9 regular expression library +.IR regexp9 (3). +It is the form used by +.IR egrep (1) +before +.I egrep +got complicated. +.PP +A +.I "regular expression" +specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. In many applications +a delimiter character, commonly +.LR / , +bounds a regular expression. +In the following specification for regular expressions +the word `character' means any character (rune) but newline. +.PP +The syntax for a regular expression +.B e0 +is +.IP +.EX +e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')' + +e2: e3 + | e2 REP + +REP: '*' | '+' | '?' + +e1: e2 + | e1 e2 + +e0: e1 + | e0 '|' e1 +.EE +.PP +A +.B literal +is any non-metacharacter, or a metacharacter +(one of +.BR .*+?[]()|\e^$ ), +or the delimiter +preceded by +.LR \e . +.PP +A +.B charclass +is a nonempty string +.I s +bracketed +.BI [ \|s\| ] +(or +.BI [^ s\| ]\fR); +it matches any character in (or not in) +.IR s . +A negated character class never +matches newline. +A substring +.IB a - b\f1, +with +.I a +and +.I b +in ascending +order, stands for the inclusive +range of +characters between +.I a +and +.IR b . +In +.IR s , +the metacharacters +.LR - , +.LR ] , +an initial +.LR ^ , +and the regular expression delimiter +must be preceded by a +.LR \e ; +other metacharacters +have no special meaning and +may appear unescaped. +.PP +A +.L . +matches any character. +.PP +A +.L ^ +matches the beginning of a line; +.L $ +matches the end of the line. +.PP +The +.B REP +operators match zero or more +.RB ( * ), +one or more +.RB ( + ), +zero or one +.RB ( ? ), +instances respectively of the preceding regular expression +.BR e2 . +.PP +A concatenated regular expression, +.BR "e1\|e2" , +matches a match to +.B e1 +followed by a match to +.BR e2 . +.PP +An alternative regular expression, +.BR "e0\||\|e1" , +matches either a match to +.B e0 +or a match to +.BR e1 . +.PP +A match to any part of a regular expression +extends as far as possible without preventing +a match to the remainder of the regular expression. +.SH "SEE ALSO +.IR regexp9 (3) diff --git a/mk/libregexp/regexp9.h b/mk/libregexp/regexp9.h @@ -0,0 +1,96 @@ +#ifndef _REGEXP9_H_ +#define _REGEXP9_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef AUTOLIB +AUTOLIB(regexp9) +#endif + +#include <utf.h> + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }s; + union + { + char *ep; + Rune *rep; + }e; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }u1; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }u2; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp9(char*); +extern Reprog *regcomplit9(char*); +extern Reprog *regcompnl9(char*); +extern void regerror9(char*); +extern int regexec9(Reprog*, char*, Resub*, int); +extern void regsub9(char*, char*, int, Resub*, int); + +extern int rregexec9(Reprog*, Rune*, Resub*, int); +extern void rregsub9(Rune*, Rune*, int, Resub*, int); + +/* + * Darwin simply cannot handle having routines that + * override other library routines. + */ +#ifndef NOPLAN9DEFINES +#define regcomp regcomp9 +#define regcomplit regcomplit9 +#define regcompnl regcompnl9 +#define regerror regerror9 +#define regexec regexec9 +#define regsub regsub9 +#define rregexec rregexec9 +#define rregsub rregsub9 +#endif + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libregexp/regsub.c b/mk/libregexp/regsub.c @@ -0,0 +1,63 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +regsub(char *sp, /* source string */ + char *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + char *ssp, *ep; + int i; + + ep = dp+dlen-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.sp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.sp; + ssp < mp[i].e.ep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.sp != 0 && mp!=0 && ms>0) + if(mp[0].s.sp != 0) + for(ssp = mp[0].s.sp; + ssp < mp[0].e.ep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; +} diff --git a/mk/libregexp/rregexec.c b/mk/libregexp/rregexec.c @@ -0,0 +1,212 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +rregexec1(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + Rune *s; + int i, checkstart; + Rune r, *rp, *ep; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + Rune *p; + + match = 0; + checkstart = j->startchar; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.rsp = 0; + mp[i].e.rep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->rstarts; + do{ + + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = runestrchr(s, j->startchar); + if(p == 0 || p == j->reol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = runestrchr(s, '\n'); + if(p == 0 || s == j->reol) + return match; + s = p+1; + break; + } + } + + r = *s; + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + _rrenewemptythread(tl, progp->startinst, ms, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ + for(inst=tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.rsp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.rep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->reol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.rep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->reol) + break; + checkstart = j->startchar && nl->inst==0; + s++; + }while(r); + return match; +} + +static int +rregexec2(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; + + /* mark space */ + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; + + return rregexec1(progp, bol, mp, ms, j); +} + +extern int +rregexec(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.rstarts = bol; + j.reol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.rstarts = mp->s.rsp; + if(mp->e.ep) + j.reol = mp->e.rep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = rregexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = rregexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/mk/libregexp/rregsub.c b/mk/libregexp/rregsub.c @@ -0,0 +1,63 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +rregsub(Rune *sp, /* source string */ + Rune *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + Rune *ssp, *ep; + int i; + + ep = dp+(dlen/sizeof(Rune))-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.rsp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.rsp; + ssp < mp[i].e.rep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.rsp != 0 && mp!=0 && ms>0) + if(mp[0].s.rsp != 0) + for(ssp = mp[0].s.rsp; + ssp < mp[0].e.rep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; +} diff --git a/mk/libregexp/test.c b/mk/libregexp/test.c @@ -0,0 +1,46 @@ +#include "lib9.h" +#include <regexp9.h> + +struct x +{ + char *re; + char *s; + Reprog *p; +}; + +struct x t[] = { + { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, + { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, + { "^plan9!(.*)$", "\\1", 0 }, + { "^helix!(.*)$", "\\1", 0 }, + { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, + { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, + { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, + { "^\xE2\x98\xBA$", "smiley", 0 }, + { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, + { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, + { 0, 0, 0 }, +}; + +main(int ac, char **av) +{ + Resub rs[10]; + char dst[128]; + int n; + struct x *tp; + + for(tp = t; tp->re; tp++) + tp->p = regcomp(tp->re); + + + for(tp = t; tp->re; tp++){ + print("%s VIA %s", av[1], tp->re); + memset(rs, 0, sizeof rs); + if(regexec(tp->p, av[1], rs, 10)){ + regsub(tp->s, dst, sizeof dst, rs, 10); + print(" sub %s -> %s", tp->s, dst); + } + print("\n"); + } + exit(0); +} diff --git a/mk/libregexp/test2.c b/mk/libregexp/test2.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <regexp9.h> + + +main(int ac, char **av) +{ + Resub rs[10]; + Reprog *p; + char *s; + int i; + + p = regcomp("[^a-z]"); + s = "\n"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + s = "0"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + exit(0); +} diff --git a/mk/libutf/NOTICE b/mk/libutf/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libutf/README b/mk/libutf/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libutf/depsinc.mk b/mk/libutf/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libutf_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libutf_DEPDIR -lutf diff --git a/mk/libutf/isalpharune.3 b/mk/libutf/isalpharune.3 @@ -0,0 +1,57 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH ISALPHARUNE 3 +.SH NAME +isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B +int isalpharune(Rune c) +.PP +.B +int islowerrune(Rune c) +.PP +.B +int isspacerune(Rune c) +.PP +.B +int istitlerune(Rune c) +.PP +.B +int isupperrune(Rune c) +.PP +.B +Rune tolowerrune(Rune c) +.PP +.B +Rune totitlerune(Rune c) +.PP +.B +Rune toupperrune(Rune c) +.SH DESCRIPTION +These routines examine and operate on Unicode characters, +in particular a subset of their properties as defined in the Unicode standard. +Unicode defines some characters as alphabetic and specifies three cases: +upper, lower, and title. +Analogously to +.IR isalpha (3) +for +.SM ASCII\c +, +these routines +test types and modify cases for Unicode characters. +The names are self-explanatory. +.PP +The case-conversion routines return the character unchanged if it has no case. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH "SEE ALSO +.IR isalpha (3) , +.IR "The Unicode Standard" . diff --git a/mk/libutf/mkfile b/mk/libutf/mkfile @@ -0,0 +1,27 @@ +LIB = libutf.a +LOBJ = \ + rune.o\ + runestrcat.o\ + runestrchr.o\ + runestrcmp.o\ + runestrcpy.o\ + runestrdup.o\ + runestrlen.o\ + runestrecpy.o\ + runestrncat.o\ + runestrncmp.o\ + runestrncpy.o\ + runestrrchr.o\ + runestrstr.o\ + runetype.o\ + utfecpy.o\ + utflen.o\ + utfnlen.o\ + utfrrune.o\ + utfrune.o\ + utfutf.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include + +<$mkbuild/mk.default + diff --git a/mk/libutf/plan9.h b/mk/libutf/plan9.h @@ -0,0 +1,29 @@ +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _utfuchar +#define ushort _utfushort +#define uint _utfuint +#define ulong _utfulong +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem(x) (sizeof (x)/sizeof (x)[0]) + diff --git a/mk/libutf/rune.3 b/mk/libutf/rune.3 @@ -0,0 +1,194 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH RUNE 3 +.SH NAME +runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion +.SH SYNOPSIS +.ta \w'\fLchar*xx'u +.B #include <utf.h> +.PP +.B +int runetochar(char *s, Rune *r) +.PP +.B +int chartorune(Rune *r, char *s) +.PP +.B +int runelen(long r) +.PP +.B +int runenlen(Rune *r, int n) +.PP +.B +int fullrune(char *s, int n) +.PP +.B +char* utfecpy(char *s1, char *es1, char *s2) +.PP +.B +int utflen(char *s) +.PP +.B +int utfnlen(char *s, long n) +.PP +.B +char* utfrune(char *s, long c) +.PP +.B +char* utfrrune(char *s, long c) +.PP +.B +char* utfutf(char *s1, char *s2) +.SH DESCRIPTION +These routines convert to and from a +.SM UTF +byte stream and runes. +.PP +.I Runetochar +copies one rune at +.I r +to at most +.B UTFmax +bytes starting at +.I s +and returns the number of bytes copied. +.BR UTFmax , +defined as +.B 3 +in +.BR <libc.h> , +is the maximum number of bytes required to represent a rune. +.PP +.I Chartorune +copies at most +.B UTFmax +bytes starting at +.I s +to one rune at +.I r +and returns the number of bytes copied. +If the input is not exactly in +.SM UTF +format, +.I chartorune +will convert to 0x80 and return 1. +.PP +.I Runelen +returns the number of bytes +required to convert +.I r +into +.SM UTF. +.PP +.I Runenlen +returns the number of bytes +required to convert the +.I n +runes pointed to by +.I r +into +.SM UTF. +.PP +.I Fullrune +returns 1 if the string +.I s +of length +.I n +is long enough to be decoded by +.I chartorune +and 0 otherwise. +This does not guarantee that the string +contains a legal +.SM UTF +encoding. +This routine is used by programs that +obtain input a byte at +a time and need to know when a full rune +has arrived. +.PP +The following routines are analogous to the +corresponding string routines with +.B utf +substituted for +.B str +and +.B rune +substituted for +.BR chr . +.PP +.I Utfecpy +copies UTF sequences until a null sequence has been copied, but writes no +sequences beyond +.IR es1 . +If any sequences are copied, +.I s1 +is terminated by a null sequence, and a pointer to that sequence is returned. +Otherwise, the original +.I s1 +is returned. +.PP +.I Utflen +returns the number of runes that +are represented by the +.SM UTF +string +.IR s . +.PP +.I Utfnlen +returns the number of complete runes that +are represented by the first +.I n +bytes of +.SM UTF +string +.IR s . +If the last few bytes of the string contain an incompletely coded rune, +.I utfnlen +will not count them; in this way, it differs from +.IR utflen , +which includes every byte of the string. +.PP +.I Utfrune +.RI ( utfrrune ) +returns a pointer to the first (last) +occurrence of rune +.I c +in the +.SM UTF +string +.IR s , +or 0 if +.I c +does not occur in the string. +The NUL byte terminating a string is considered to +be part of the string +.IR s . +.PP +.I Utfutf +returns a pointer to the first occurrence of +the +.SM UTF +string +.I s2 +as a +.SM UTF +substring of +.IR s1 , +or 0 if there is none. +If +.I s2 +is the null string, +.I utfutf +returns +.IR s1 . +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR utf (7), +.IR tcs (1) diff --git a/mk/libutf/rune.c b/mk/libutf/rune.c @@ -0,0 +1,217 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + Bad = Runeerror +}; + +int +chartorune(Rune *rune, char *str) +{ + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence + * 10000-10FFFF => T4 Tx Tx Tx + */ + if(UTFmax >= 4) { + c3 = *(uchar*)(str+3) ^ Tx; + if(c3 & Testx) + goto bad; + if(c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if(l <= Rune3) + goto bad; + if(l > Runemax) + goto bad; + *rune = l; + return 4; + } + } + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +runetochar(char *str, Rune *rune) +{ + long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { + str[0] = c; + return 1; + } + + /* + * two character sequence + * 00080-007FF => T2 Tx + */ + if(c <= Rune2) { + str[0] = T2 | (c >> 1*Bitx); + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * three character sequence + * 00800-0FFFF => T3 Tx Tx + */ + if(c > Runemax) + c = Runeerror; + if(c <= Rune3) { + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; + } + + /* + * four character sequence + * 010000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = T4 | (c >> 3*Bitx); + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; +} + +int +runelen(long c) +{ + Rune rune; + char str[10]; + + rune = c; + return runetochar(str, &rune); +} + +int +runenlen(Rune *r, int nrune) +{ + int nb, c; + + nb = 0; + while(nrune--) { + c = *r++; + if(c <= Rune1) + nb++; + else + if(c <= Rune2) + nb += 2; + else + if(c <= Rune3 || c > Runemax) + nb += 3; + else + nb += 4; + } + return nb; +} + +int +fullrune(char *str, int n) +{ + int c; + + if(n <= 0) + return 0; + c = *(uchar*)str; + if(c < Tx) + return 1; + if(c < T3) + return n >= 2; + if(UTFmax == 3 || c < T4) + return n >= 3; + return n >= 4; +} diff --git a/mk/libutf/runestrcat.3 b/mk/libutf/runestrcat.3 @@ -0,0 +1,74 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH RUNESTRCAT 3 +.SH NAME +runestrcat, +runestrncat, +runestrcmp, +runestrncmp, +runestrcpy, +runestrncpy, +runestrecpy, +runestrlen, +runestrchr, +runestrrchr, +runestrdup, +runestrstr \- rune string operations +.SH SYNOPSIS +.B #include <u.h> +.br +.B #include <libc.h> +.PP +.ta \w'\fLRune* \fP'u +.B +Rune* runestrcat(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncat(Rune *s1, Rune *s2, long n) +.PP +.B +int runestrcmp(Rune *s1, Rune *s2) +.PP +.B +int runestrncmp(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrcpy(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncpy(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2) +.PP +.B +long runestrlen(Rune *s) +.PP +.B +Rune* runestrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrdup(Rune *s) +.PP +.B +Rune* runestrstr(Rune *s1, Rune *s2) +.SH DESCRIPTION +These functions are rune string analogues of +the corresponding functions in +.IR strcat (3). +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR rune (3), +.IR strcat (3) +.SH BUGS +The outcome of overlapping moves varies among implementations. diff --git a/mk/libutf/runestrcat.c b/mk/libutf/runestrcat.c @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrcat(Rune *s1, Rune *s2) +{ + + runestrcpy(runestrchr(s1, 0), s2); + return s1; +} diff --git a/mk/libutf/runestrchr.c b/mk/libutf/runestrchr.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrchr(Rune *s, Rune c) +{ + Rune c0 = c; + Rune c1; + + if(c == 0) { + while(*s++) + ; + return s-1; + } + + while(c1 = *s++) + if(c1 == c0) + return s-1; + return 0; +} diff --git a/mk/libutf/runestrcmp.c b/mk/libutf/runestrcmp.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +runestrcmp(Rune *s1, Rune *s2) +{ + Rune c1, c2; + + for(;;) { + c1 = *s1++; + c2 = *s2++; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + return 0; + } +} diff --git a/mk/libutf/runestrcpy.c b/mk/libutf/runestrcpy.c @@ -0,0 +1,28 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrcpy(Rune *s1, Rune *s2) +{ + Rune *os1; + + os1 = s1; + while(*s1++ = *s2++) + ; + return os1; +} diff --git a/mk/libutf/runestrdup.c b/mk/libutf/runestrdup.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrdup(Rune *s) +{ + Rune *ns; + + ns = malloc(sizeof(Rune)*(runestrlen(s) + 1)); + if(ns == 0) + return 0; + + return runestrcpy(ns, s); +} diff --git a/mk/libutf/runestrecpy.c b/mk/libutf/runestrecpy.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrecpy(Rune *s1, Rune *es1, Rune *s2) +{ + if(s1 >= es1) + return s1; + + while(*s1++ = *s2++){ + if(s1 == es1){ + *--s1 = '\0'; + break; + } + } + return s1; +} diff --git a/mk/libutf/runestrlen.c b/mk/libutf/runestrlen.c @@ -0,0 +1,24 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +long +runestrlen(Rune *s) +{ + + return runestrchr(s, 0) - s; +} diff --git a/mk/libutf/runestrncat.c b/mk/libutf/runestrncat.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrncat(Rune *s1, Rune *s2, long n) +{ + Rune *os1; + + os1 = s1; + s1 = runestrchr(s1, 0); + while(*s1++ = *s2++) + if(--n < 0) { + s1[-1] = 0; + break; + } + return os1; +} diff --git a/mk/libutf/runestrncmp.c b/mk/libutf/runestrncmp.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +runestrncmp(Rune *s1, Rune *s2, long n) +{ + Rune c1, c2; + + while(n > 0) { + c1 = *s1++; + c2 = *s2++; + n--; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + break; + } + return 0; +} diff --git a/mk/libutf/runestrncpy.c b/mk/libutf/runestrncpy.c @@ -0,0 +1,33 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrncpy(Rune *s1, Rune *s2, long n) +{ + int i; + Rune *os1; + + os1 = s1; + for(i = 0; i < n; i++) + if((*s1++ = *s2++) == 0) { + while(++i < n) + *s1++ = 0; + return os1; + } + return os1; +} diff --git a/mk/libutf/runestrrchr.c b/mk/libutf/runestrrchr.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrrchr(Rune *s, Rune c) +{ + Rune *r; + + if(c == 0) + return runestrchr(s, 0); + r = 0; + while(s = runestrchr(s, c)) + r = s++; + return r; +} diff --git a/mk/libutf/runestrstr.c b/mk/libutf/runestrstr.c @@ -0,0 +1,44 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +Rune* +runestrstr(Rune *s1, Rune *s2) +{ + Rune *p, *pa, *pb; + int c0, c; + + c0 = *s2; + if(c0 == 0) + return s1; + s2++; + for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) { + pa = p; + for(pb=s2;; pb++) { + c = *pb; + if(c == 0) + return p; + if(c != *++pa) + break; + } + } + return 0; +} diff --git a/mk/libutf/runetype.c b/mk/libutf/runetype.c @@ -0,0 +1,1151 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +/* + * alpha ranges - + * only covers ranges not in lower||upper + */ +static +Rune __alpha2[] = +{ + 0x00d8, 0x00f6, /* Ø - ö */ + 0x00f8, 0x01f5, /* ø - ǵ */ + 0x0250, 0x02a8, /* ɐ - ʨ */ + 0x038e, 0x03a1, /* Ύ - Ρ */ + 0x03a3, 0x03ce, /* Σ - ώ */ + 0x03d0, 0x03d6, /* ϐ - ϖ */ + 0x03e2, 0x03f3, /* Ϣ - ϳ */ + 0x0490, 0x04c4, /* Ґ - ӄ */ + 0x0561, 0x0587, /* ա - և */ + 0x05d0, 0x05ea, /* א - ת */ + 0x05f0, 0x05f2, /* װ - ײ */ + 0x0621, 0x063a, /* ء - غ */ + 0x0640, 0x064a, /* ـ - ي */ + 0x0671, 0x06b7, /* ٱ - ڷ */ + 0x06ba, 0x06be, /* ں - ھ */ + 0x06c0, 0x06ce, /* ۀ - ێ */ + 0x06d0, 0x06d3, /* ې - ۓ */ + 0x0905, 0x0939, /* अ - ह */ + 0x0958, 0x0961, /* क़ - ॡ */ + 0x0985, 0x098c, /* অ - ঌ */ + 0x098f, 0x0990, /* এ - ঐ */ + 0x0993, 0x09a8, /* ও - ন */ + 0x09aa, 0x09b0, /* প - র */ + 0x09b6, 0x09b9, /* শ - হ */ + 0x09dc, 0x09dd, /* ড় - ঢ় */ + 0x09df, 0x09e1, /* য় - ৡ */ + 0x09f0, 0x09f1, /* ৰ - ৱ */ + 0x0a05, 0x0a0a, /* ਅ - ਊ */ + 0x0a0f, 0x0a10, /* ਏ - ਐ */ + 0x0a13, 0x0a28, /* ਓ - ਨ */ + 0x0a2a, 0x0a30, /* ਪ - ਰ */ + 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ + 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ + 0x0a38, 0x0a39, /* ਸ - ਹ */ + 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ + 0x0a85, 0x0a8b, /* અ - ઋ */ + 0x0a8f, 0x0a91, /* એ - ઑ */ + 0x0a93, 0x0aa8, /* ઓ - ન */ + 0x0aaa, 0x0ab0, /* પ - ર */ + 0x0ab2, 0x0ab3, /* લ - ળ */ + 0x0ab5, 0x0ab9, /* વ - હ */ + 0x0b05, 0x0b0c, /* ଅ - ଌ */ + 0x0b0f, 0x0b10, /* ଏ - ଐ */ + 0x0b13, 0x0b28, /* ଓ - ନ */ + 0x0b2a, 0x0b30, /* ପ - ର */ + 0x0b32, 0x0b33, /* ଲ - ଳ */ + 0x0b36, 0x0b39, /* ଶ - ହ */ + 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ + 0x0b5f, 0x0b61, /* ୟ - ୡ */ + 0x0b85, 0x0b8a, /* அ - ஊ */ + 0x0b8e, 0x0b90, /* எ - ஐ */ + 0x0b92, 0x0b95, /* ஒ - க */ + 0x0b99, 0x0b9a, /* ங - ச */ + 0x0b9e, 0x0b9f, /* ஞ - ட */ + 0x0ba3, 0x0ba4, /* ண - த */ + 0x0ba8, 0x0baa, /* ந - ப */ + 0x0bae, 0x0bb5, /* ம - வ */ + 0x0bb7, 0x0bb9, /* ஷ - ஹ */ + 0x0c05, 0x0c0c, /* అ - ఌ */ + 0x0c0e, 0x0c10, /* ఎ - ఐ */ + 0x0c12, 0x0c28, /* ఒ - న */ + 0x0c2a, 0x0c33, /* ప - ళ */ + 0x0c35, 0x0c39, /* వ - హ */ + 0x0c60, 0x0c61, /* ౠ - ౡ */ + 0x0c85, 0x0c8c, /* ಅ - ಌ */ + 0x0c8e, 0x0c90, /* ಎ - ಐ */ + 0x0c92, 0x0ca8, /* ಒ - ನ */ + 0x0caa, 0x0cb3, /* ಪ - ಳ */ + 0x0cb5, 0x0cb9, /* ವ - ಹ */ + 0x0ce0, 0x0ce1, /* ೠ - ೡ */ + 0x0d05, 0x0d0c, /* അ - ഌ */ + 0x0d0e, 0x0d10, /* എ - ഐ */ + 0x0d12, 0x0d28, /* ഒ - ന */ + 0x0d2a, 0x0d39, /* പ - ഹ */ + 0x0d60, 0x0d61, /* ൠ - ൡ */ + 0x0e01, 0x0e30, /* ก - ะ */ + 0x0e32, 0x0e33, /* า - ำ */ + 0x0e40, 0x0e46, /* เ - ๆ */ + 0x0e5a, 0x0e5b, /* ๚ - ๛ */ + 0x0e81, 0x0e82, /* ກ - ຂ */ + 0x0e87, 0x0e88, /* ງ - ຈ */ + 0x0e94, 0x0e97, /* ດ - ທ */ + 0x0e99, 0x0e9f, /* ນ - ຟ */ + 0x0ea1, 0x0ea3, /* ມ - ຣ */ + 0x0eaa, 0x0eab, /* ສ - ຫ */ + 0x0ead, 0x0eae, /* ອ - ຮ */ + 0x0eb2, 0x0eb3, /* າ - ຳ */ + 0x0ec0, 0x0ec4, /* ເ - ໄ */ + 0x0edc, 0x0edd, /* ໜ - ໝ */ + 0x0f18, 0x0f19, /* ༘ - ༙ */ + 0x0f40, 0x0f47, /* ཀ - ཇ */ + 0x0f49, 0x0f69, /* ཉ - ཀྵ */ + 0x10d0, 0x10f6, /* ა - ჶ */ + 0x1100, 0x1159, /* ᄀ - ᅙ */ + 0x115f, 0x11a2, /* ᅟ - ᆢ */ + 0x11a8, 0x11f9, /* ᆨ - ᇹ */ + 0x1e00, 0x1e9b, /* Ḁ - ẛ */ + 0x1f50, 0x1f57, /* ὐ - ὗ */ + 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ + 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ + 0x1fc2, 0x1fc4, /* ῂ - ῄ */ + 0x1fc6, 0x1fcc, /* ῆ - ῌ */ + 0x1fd0, 0x1fd3, /* ῐ - ΐ */ + 0x1fd6, 0x1fdb, /* ῖ - Ί */ + 0x1fe0, 0x1fec, /* ῠ - Ῥ */ + 0x1ff2, 0x1ff4, /* ῲ - ῴ */ + 0x1ff6, 0x1ffc, /* ῶ - ῼ */ + 0x210a, 0x2113, /* ℊ - ℓ */ + 0x2115, 0x211d, /* ℕ - ℝ */ + 0x2120, 0x2122, /* ℠ - ™ */ + 0x212a, 0x2131, /* K - ℱ */ + 0x2133, 0x2138, /* ℳ - ℸ */ + 0x3041, 0x3094, /* ぁ - ゔ */ + 0x30a1, 0x30fa, /* ァ - ヺ */ + 0x3105, 0x312c, /* ㄅ - ㄬ */ + 0x3131, 0x318e, /* ㄱ - ㆎ */ + 0x3192, 0x319f, /* ㆒ - ㆟ */ + 0x3260, 0x327b, /* ㉠ - ㉻ */ + 0x328a, 0x32b0, /* ㊊ - ㊰ */ + 0x32d0, 0x32fe, /* ㋐ - ㋾ */ + 0x3300, 0x3357, /* ㌀ - ㍗ */ + 0x3371, 0x3376, /* ㍱ - ㍶ */ + 0x337b, 0x3394, /* ㍻ - ㎔ */ + 0x3399, 0x339e, /* ㎙ - ㎞ */ + 0x33a9, 0x33ad, /* ㎩ - ㎭ */ + 0x33b0, 0x33c1, /* ㎰ - ㏁ */ + 0x33c3, 0x33c5, /* ㏃ - ㏅ */ + 0x33c7, 0x33d7, /* ㏇ - ㏗ */ + 0x33d9, 0x33dd, /* ㏙ - ㏝ */ + 0x4e00, 0x9fff, /* 一 - 鿿 */ + 0xac00, 0xd7a3, /* 가 - 힣 */ + 0xf900, 0xfb06, /* 豈 - st */ + 0xfb13, 0xfb17, /* ﬓ - ﬗ */ + 0xfb1f, 0xfb28, /* ײַ - ﬨ */ + 0xfb2a, 0xfb36, /* שׁ - זּ */ + 0xfb38, 0xfb3c, /* טּ - לּ */ + 0xfb40, 0xfb41, /* נּ - סּ */ + 0xfb43, 0xfb44, /* ףּ - פּ */ + 0xfb46, 0xfbb1, /* צּ - ﮱ */ + 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ + 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ + 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ + 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ + 0xfe70, 0xfe72, /* ﹰ - ﹲ */ + 0xfe76, 0xfefc, /* ﹶ - ﻼ */ + 0xff66, 0xff6f, /* ヲ - ッ */ + 0xff71, 0xff9d, /* ア - ン */ + 0xffa0, 0xffbe, /* ᅠ - ᄒ */ + 0xffc2, 0xffc7, /* ᅡ - ᅦ */ + 0xffca, 0xffcf, /* ᅧ - ᅬ */ + 0xffd2, 0xffd7, /* ᅭ - ᅲ */ + 0xffda, 0xffdc, /* ᅳ - ᅵ */ +}; + +/* + * alpha singlets - + * only covers ranges not in lower||upper + */ +static +Rune __alpha1[] = +{ + 0x00aa, /* ª */ + 0x00b5, /* µ */ + 0x00ba, /* º */ + 0x03da, /* Ϛ */ + 0x03dc, /* Ϝ */ + 0x03de, /* Ϟ */ + 0x03e0, /* Ϡ */ + 0x06d5, /* ە */ + 0x09b2, /* ল */ + 0x0a5e, /* ਫ਼ */ + 0x0a8d, /* ઍ */ + 0x0ae0, /* ૠ */ + 0x0b9c, /* ஜ */ + 0x0cde, /* ೞ */ + 0x0e4f, /* ๏ */ + 0x0e84, /* ຄ */ + 0x0e8a, /* ຊ */ + 0x0e8d, /* ຍ */ + 0x0ea5, /* ລ */ + 0x0ea7, /* ວ */ + 0x0eb0, /* ະ */ + 0x0ebd, /* ຽ */ + 0x1fbe, /* ι */ + 0x207f, /* ⁿ */ + 0x20a8, /* ₨ */ + 0x2102, /* ℂ */ + 0x2107, /* ℇ */ + 0x2124, /* ℤ */ + 0x2126, /* Ω */ + 0x2128, /* ℨ */ + 0xfb3e, /* מּ */ + 0xfe74, /* ﹴ */ +}; + +/* + * space ranges + */ +static +Rune __space2[] = +{ + 0x0009, 0x000a, /* tab and newline */ + 0x0020, 0x0020, /* space */ + 0x00a0, 0x00a0, /*   */ + 0x2000, 0x200b, /*   - ​ */ + 0x2028, 0x2029, /* 
 - 
 */ + 0x3000, 0x3000, /*   */ + 0xfeff, 0xfeff, /*  */ +}; + +/* + * lower case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __toupper2[] = +{ + 0x0061, 0x007a, 468, /* a-z A-Z */ + 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ + 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ + 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ + 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ + 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ + 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ + 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ + 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ + 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ + 0x0430, 0x044f, 468, /* а-я А-Я */ + 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ + 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ + 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ + 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ + 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ + 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ + 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ + 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ + 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ + 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ + 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ + 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ + 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ + 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ + 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ + 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ + 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ + 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ + 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ + 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ + 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ + 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ + 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ + 0xff41, 0xff5a, 468, /* a-z A-Z */ +}; + +/* + * lower case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __toupper1[] = +{ + 0x00ff, 621, /* ÿ Ÿ */ + 0x0101, 499, /* ā Ā */ + 0x0103, 499, /* ă Ă */ + 0x0105, 499, /* ą Ą */ + 0x0107, 499, /* ć Ć */ + 0x0109, 499, /* ĉ Ĉ */ + 0x010b, 499, /* ċ Ċ */ + 0x010d, 499, /* č Č */ + 0x010f, 499, /* ď Ď */ + 0x0111, 499, /* đ Đ */ + 0x0113, 499, /* ē Ē */ + 0x0115, 499, /* ĕ Ĕ */ + 0x0117, 499, /* ė Ė */ + 0x0119, 499, /* ę Ę */ + 0x011b, 499, /* ě Ě */ + 0x011d, 499, /* ĝ Ĝ */ + 0x011f, 499, /* ğ Ğ */ + 0x0121, 499, /* ġ Ġ */ + 0x0123, 499, /* ģ Ģ */ + 0x0125, 499, /* ĥ Ĥ */ + 0x0127, 499, /* ħ Ħ */ + 0x0129, 499, /* ĩ Ĩ */ + 0x012b, 499, /* ī Ī */ + 0x012d, 499, /* ĭ Ĭ */ + 0x012f, 499, /* į Į */ + 0x0131, 268, /* ı I */ + 0x0133, 499, /* ij IJ */ + 0x0135, 499, /* ĵ Ĵ */ + 0x0137, 499, /* ķ Ķ */ + 0x013a, 499, /* ĺ Ĺ */ + 0x013c, 499, /* ļ Ļ */ + 0x013e, 499, /* ľ Ľ */ + 0x0140, 499, /* ŀ Ŀ */ + 0x0142, 499, /* ł Ł */ + 0x0144, 499, /* ń Ń */ + 0x0146, 499, /* ņ Ņ */ + 0x0148, 499, /* ň Ň */ + 0x014b, 499, /* ŋ Ŋ */ + 0x014d, 499, /* ō Ō */ + 0x014f, 499, /* ŏ Ŏ */ + 0x0151, 499, /* ő Ő */ + 0x0153, 499, /* œ Œ */ + 0x0155, 499, /* ŕ Ŕ */ + 0x0157, 499, /* ŗ Ŗ */ + 0x0159, 499, /* ř Ř */ + 0x015b, 499, /* ś Ś */ + 0x015d, 499, /* ŝ Ŝ */ + 0x015f, 499, /* ş Ş */ + 0x0161, 499, /* š Š */ + 0x0163, 499, /* ţ Ţ */ + 0x0165, 499, /* ť Ť */ + 0x0167, 499, /* ŧ Ŧ */ + 0x0169, 499, /* ũ Ũ */ + 0x016b, 499, /* ū Ū */ + 0x016d, 499, /* ŭ Ŭ */ + 0x016f, 499, /* ů Ů */ + 0x0171, 499, /* ű Ű */ + 0x0173, 499, /* ų Ų */ + 0x0175, 499, /* ŵ Ŵ */ + 0x0177, 499, /* ŷ Ŷ */ + 0x017a, 499, /* ź Ź */ + 0x017c, 499, /* ż Ż */ + 0x017e, 499, /* ž Ž */ + 0x017f, 200, /* ſ S */ + 0x0183, 499, /* ƃ Ƃ */ + 0x0185, 499, /* ƅ Ƅ */ + 0x0188, 499, /* ƈ Ƈ */ + 0x018c, 499, /* ƌ Ƌ */ + 0x0192, 499, /* ƒ Ƒ */ + 0x0199, 499, /* ƙ Ƙ */ + 0x01a1, 499, /* ơ Ơ */ + 0x01a3, 499, /* ƣ Ƣ */ + 0x01a5, 499, /* ƥ Ƥ */ + 0x01a8, 499, /* ƨ Ƨ */ + 0x01ad, 499, /* ƭ Ƭ */ + 0x01b0, 499, /* ư Ư */ + 0x01b4, 499, /* ƴ Ƴ */ + 0x01b6, 499, /* ƶ Ƶ */ + 0x01b9, 499, /* ƹ Ƹ */ + 0x01bd, 499, /* ƽ Ƽ */ + 0x01c5, 499, /* Dž DŽ */ + 0x01c6, 498, /* dž DŽ */ + 0x01c8, 499, /* Lj LJ */ + 0x01c9, 498, /* lj LJ */ + 0x01cb, 499, /* Nj NJ */ + 0x01cc, 498, /* nj NJ */ + 0x01ce, 499, /* ǎ Ǎ */ + 0x01d0, 499, /* ǐ Ǐ */ + 0x01d2, 499, /* ǒ Ǒ */ + 0x01d4, 499, /* ǔ Ǔ */ + 0x01d6, 499, /* ǖ Ǖ */ + 0x01d8, 499, /* ǘ Ǘ */ + 0x01da, 499, /* ǚ Ǚ */ + 0x01dc, 499, /* ǜ Ǜ */ + 0x01df, 499, /* ǟ Ǟ */ + 0x01e1, 499, /* ǡ Ǡ */ + 0x01e3, 499, /* ǣ Ǣ */ + 0x01e5, 499, /* ǥ Ǥ */ + 0x01e7, 499, /* ǧ Ǧ */ + 0x01e9, 499, /* ǩ Ǩ */ + 0x01eb, 499, /* ǫ Ǫ */ + 0x01ed, 499, /* ǭ Ǭ */ + 0x01ef, 499, /* ǯ Ǯ */ + 0x01f2, 499, /* Dz DZ */ + 0x01f3, 498, /* dz DZ */ + 0x01f5, 499, /* ǵ Ǵ */ + 0x01fb, 499, /* ǻ Ǻ */ + 0x01fd, 499, /* ǽ Ǽ */ + 0x01ff, 499, /* ǿ Ǿ */ + 0x0201, 499, /* ȁ Ȁ */ + 0x0203, 499, /* ȃ Ȃ */ + 0x0205, 499, /* ȅ Ȅ */ + 0x0207, 499, /* ȇ Ȇ */ + 0x0209, 499, /* ȉ Ȉ */ + 0x020b, 499, /* ȋ Ȋ */ + 0x020d, 499, /* ȍ Ȍ */ + 0x020f, 499, /* ȏ Ȏ */ + 0x0211, 499, /* ȑ Ȑ */ + 0x0213, 499, /* ȓ Ȓ */ + 0x0215, 499, /* ȕ Ȕ */ + 0x0217, 499, /* ȗ Ȗ */ + 0x0253, 290, /* ɓ Ɓ */ + 0x0254, 294, /* ɔ Ɔ */ + 0x025b, 297, /* ɛ Ɛ */ + 0x0260, 295, /* ɠ Ɠ */ + 0x0263, 293, /* ɣ Ɣ */ + 0x0268, 291, /* ɨ Ɨ */ + 0x0269, 289, /* ɩ Ɩ */ + 0x026f, 289, /* ɯ Ɯ */ + 0x0272, 287, /* ɲ Ɲ */ + 0x0283, 282, /* ʃ Ʃ */ + 0x0288, 282, /* ʈ Ʈ */ + 0x0292, 281, /* ʒ Ʒ */ + 0x03ac, 462, /* ά Ά */ + 0x03cc, 436, /* ό Ό */ + 0x03d0, 438, /* ϐ Β */ + 0x03d1, 443, /* ϑ Θ */ + 0x03d5, 453, /* ϕ Φ */ + 0x03d6, 446, /* ϖ Π */ + 0x03e3, 499, /* ϣ Ϣ */ + 0x03e5, 499, /* ϥ Ϥ */ + 0x03e7, 499, /* ϧ Ϧ */ + 0x03e9, 499, /* ϩ Ϩ */ + 0x03eb, 499, /* ϫ Ϫ */ + 0x03ed, 499, /* ϭ Ϭ */ + 0x03ef, 499, /* ϯ Ϯ */ + 0x03f0, 414, /* ϰ Κ */ + 0x03f1, 420, /* ϱ Ρ */ + 0x0461, 499, /* ѡ Ѡ */ + 0x0463, 499, /* ѣ Ѣ */ + 0x0465, 499, /* ѥ Ѥ */ + 0x0467, 499, /* ѧ Ѧ */ + 0x0469, 499, /* ѩ Ѩ */ + 0x046b, 499, /* ѫ Ѫ */ + 0x046d, 499, /* ѭ Ѭ */ + 0x046f, 499, /* ѯ Ѯ */ + 0x0471, 499, /* ѱ Ѱ */ + 0x0473, 499, /* ѳ Ѳ */ + 0x0475, 499, /* ѵ Ѵ */ + 0x0477, 499, /* ѷ Ѷ */ + 0x0479, 499, /* ѹ Ѹ */ + 0x047b, 499, /* ѻ Ѻ */ + 0x047d, 499, /* ѽ Ѽ */ + 0x047f, 499, /* ѿ Ѿ */ + 0x0481, 499, /* ҁ Ҁ */ + 0x0491, 499, /* ґ Ґ */ + 0x0493, 499, /* ғ Ғ */ + 0x0495, 499, /* ҕ Ҕ */ + 0x0497, 499, /* җ Җ */ + 0x0499, 499, /* ҙ Ҙ */ + 0x049b, 499, /* қ Қ */ + 0x049d, 499, /* ҝ Ҝ */ + 0x049f, 499, /* ҟ Ҟ */ + 0x04a1, 499, /* ҡ Ҡ */ + 0x04a3, 499, /* ң Ң */ + 0x04a5, 499, /* ҥ Ҥ */ + 0x04a7, 499, /* ҧ Ҧ */ + 0x04a9, 499, /* ҩ Ҩ */ + 0x04ab, 499, /* ҫ Ҫ */ + 0x04ad, 499, /* ҭ Ҭ */ + 0x04af, 499, /* ү Ү */ + 0x04b1, 499, /* ұ Ұ */ + 0x04b3, 499, /* ҳ Ҳ */ + 0x04b5, 499, /* ҵ Ҵ */ + 0x04b7, 499, /* ҷ Ҷ */ + 0x04b9, 499, /* ҹ Ҹ */ + 0x04bb, 499, /* һ Һ */ + 0x04bd, 499, /* ҽ Ҽ */ + 0x04bf, 499, /* ҿ Ҿ */ + 0x04c2, 499, /* ӂ Ӂ */ + 0x04c4, 499, /* ӄ Ӄ */ + 0x04c8, 499, /* ӈ Ӈ */ + 0x04cc, 499, /* ӌ Ӌ */ + 0x04d1, 499, /* ӑ Ӑ */ + 0x04d3, 499, /* ӓ Ӓ */ + 0x04d5, 499, /* ӕ Ӕ */ + 0x04d7, 499, /* ӗ Ӗ */ + 0x04d9, 499, /* ә Ә */ + 0x04db, 499, /* ӛ Ӛ */ + 0x04dd, 499, /* ӝ Ӝ */ + 0x04df, 499, /* ӟ Ӟ */ + 0x04e1, 499, /* ӡ Ӡ */ + 0x04e3, 499, /* ӣ Ӣ */ + 0x04e5, 499, /* ӥ Ӥ */ + 0x04e7, 499, /* ӧ Ӧ */ + 0x04e9, 499, /* ө Ө */ + 0x04eb, 499, /* ӫ Ӫ */ + 0x04ef, 499, /* ӯ Ӯ */ + 0x04f1, 499, /* ӱ Ӱ */ + 0x04f3, 499, /* ӳ Ӳ */ + 0x04f5, 499, /* ӵ Ӵ */ + 0x04f9, 499, /* ӹ Ӹ */ + 0x1e01, 499, /* ḁ Ḁ */ + 0x1e03, 499, /* ḃ Ḃ */ + 0x1e05, 499, /* ḅ Ḅ */ + 0x1e07, 499, /* ḇ Ḇ */ + 0x1e09, 499, /* ḉ Ḉ */ + 0x1e0b, 499, /* ḋ Ḋ */ + 0x1e0d, 499, /* ḍ Ḍ */ + 0x1e0f, 499, /* ḏ Ḏ */ + 0x1e11, 499, /* ḑ Ḑ */ + 0x1e13, 499, /* ḓ Ḓ */ + 0x1e15, 499, /* ḕ Ḕ */ + 0x1e17, 499, /* ḗ Ḗ */ + 0x1e19, 499, /* ḙ Ḙ */ + 0x1e1b, 499, /* ḛ Ḛ */ + 0x1e1d, 499, /* ḝ Ḝ */ + 0x1e1f, 499, /* ḟ Ḟ */ + 0x1e21, 499, /* ḡ Ḡ */ + 0x1e23, 499, /* ḣ Ḣ */ + 0x1e25, 499, /* ḥ Ḥ */ + 0x1e27, 499, /* ḧ Ḧ */ + 0x1e29, 499, /* ḩ Ḩ */ + 0x1e2b, 499, /* ḫ Ḫ */ + 0x1e2d, 499, /* ḭ Ḭ */ + 0x1e2f, 499, /* ḯ Ḯ */ + 0x1e31, 499, /* ḱ Ḱ */ + 0x1e33, 499, /* ḳ Ḳ */ + 0x1e35, 499, /* ḵ Ḵ */ + 0x1e37, 499, /* ḷ Ḷ */ + 0x1e39, 499, /* ḹ Ḹ */ + 0x1e3b, 499, /* ḻ Ḻ */ + 0x1e3d, 499, /* ḽ Ḽ */ + 0x1e3f, 499, /* ḿ Ḿ */ + 0x1e41, 499, /* ṁ Ṁ */ + 0x1e43, 499, /* ṃ Ṃ */ + 0x1e45, 499, /* ṅ Ṅ */ + 0x1e47, 499, /* ṇ Ṇ */ + 0x1e49, 499, /* ṉ Ṉ */ + 0x1e4b, 499, /* ṋ Ṋ */ + 0x1e4d, 499, /* ṍ Ṍ */ + 0x1e4f, 499, /* ṏ Ṏ */ + 0x1e51, 499, /* ṑ Ṑ */ + 0x1e53, 499, /* ṓ Ṓ */ + 0x1e55, 499, /* ṕ Ṕ */ + 0x1e57, 499, /* ṗ Ṗ */ + 0x1e59, 499, /* ṙ Ṙ */ + 0x1e5b, 499, /* ṛ Ṛ */ + 0x1e5d, 499, /* ṝ Ṝ */ + 0x1e5f, 499, /* ṟ Ṟ */ + 0x1e61, 499, /* ṡ Ṡ */ + 0x1e63, 499, /* ṣ Ṣ */ + 0x1e65, 499, /* ṥ Ṥ */ + 0x1e67, 499, /* ṧ Ṧ */ + 0x1e69, 499, /* ṩ Ṩ */ + 0x1e6b, 499, /* ṫ Ṫ */ + 0x1e6d, 499, /* ṭ Ṭ */ + 0x1e6f, 499, /* ṯ Ṯ */ + 0x1e71, 499, /* ṱ Ṱ */ + 0x1e73, 499, /* ṳ Ṳ */ + 0x1e75, 499, /* ṵ Ṵ */ + 0x1e77, 499, /* ṷ Ṷ */ + 0x1e79, 499, /* ṹ Ṹ */ + 0x1e7b, 499, /* ṻ Ṻ */ + 0x1e7d, 499, /* ṽ Ṽ */ + 0x1e7f, 499, /* ṿ Ṿ */ + 0x1e81, 499, /* ẁ Ẁ */ + 0x1e83, 499, /* ẃ Ẃ */ + 0x1e85, 499, /* ẅ Ẅ */ + 0x1e87, 499, /* ẇ Ẇ */ + 0x1e89, 499, /* ẉ Ẉ */ + 0x1e8b, 499, /* ẋ Ẋ */ + 0x1e8d, 499, /* ẍ Ẍ */ + 0x1e8f, 499, /* ẏ Ẏ */ + 0x1e91, 499, /* ẑ Ẑ */ + 0x1e93, 499, /* ẓ Ẓ */ + 0x1e95, 499, /* ẕ Ẕ */ + 0x1ea1, 499, /* ạ Ạ */ + 0x1ea3, 499, /* ả Ả */ + 0x1ea5, 499, /* ấ Ấ */ + 0x1ea7, 499, /* ầ Ầ */ + 0x1ea9, 499, /* ẩ Ẩ */ + 0x1eab, 499, /* ẫ Ẫ */ + 0x1ead, 499, /* ậ Ậ */ + 0x1eaf, 499, /* ắ Ắ */ + 0x1eb1, 499, /* ằ Ằ */ + 0x1eb3, 499, /* ẳ Ẳ */ + 0x1eb5, 499, /* ẵ Ẵ */ + 0x1eb7, 499, /* ặ Ặ */ + 0x1eb9, 499, /* ẹ Ẹ */ + 0x1ebb, 499, /* ẻ Ẻ */ + 0x1ebd, 499, /* ẽ Ẽ */ + 0x1ebf, 499, /* ế Ế */ + 0x1ec1, 499, /* ề Ề */ + 0x1ec3, 499, /* ể Ể */ + 0x1ec5, 499, /* ễ Ễ */ + 0x1ec7, 499, /* ệ Ệ */ + 0x1ec9, 499, /* ỉ Ỉ */ + 0x1ecb, 499, /* ị Ị */ + 0x1ecd, 499, /* ọ Ọ */ + 0x1ecf, 499, /* ỏ Ỏ */ + 0x1ed1, 499, /* ố Ố */ + 0x1ed3, 499, /* ồ Ồ */ + 0x1ed5, 499, /* ổ Ổ */ + 0x1ed7, 499, /* ỗ Ỗ */ + 0x1ed9, 499, /* ộ Ộ */ + 0x1edb, 499, /* ớ Ớ */ + 0x1edd, 499, /* ờ Ờ */ + 0x1edf, 499, /* ở Ở */ + 0x1ee1, 499, /* ỡ Ỡ */ + 0x1ee3, 499, /* ợ Ợ */ + 0x1ee5, 499, /* ụ Ụ */ + 0x1ee7, 499, /* ủ Ủ */ + 0x1ee9, 499, /* ứ Ứ */ + 0x1eeb, 499, /* ừ Ừ */ + 0x1eed, 499, /* ử Ử */ + 0x1eef, 499, /* ữ Ữ */ + 0x1ef1, 499, /* ự Ự */ + 0x1ef3, 499, /* ỳ Ỳ */ + 0x1ef5, 499, /* ỵ Ỵ */ + 0x1ef7, 499, /* ỷ Ỷ */ + 0x1ef9, 499, /* ỹ Ỹ */ + 0x1f51, 508, /* ὑ Ὑ */ + 0x1f53, 508, /* ὓ Ὓ */ + 0x1f55, 508, /* ὕ Ὕ */ + 0x1f57, 508, /* ὗ Ὗ */ + 0x1fb3, 509, /* ᾳ ᾼ */ + 0x1fc3, 509, /* ῃ ῌ */ + 0x1fe5, 507, /* ῥ Ῥ */ + 0x1ff3, 509, /* ῳ ῼ */ +}; + +/* + * upper case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __tolower2[] = +{ + 0x0041, 0x005a, 532, /* A-Z a-z */ + 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ + 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ + 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ + 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ + 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ + 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ + 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ + 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ + 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ + 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ + 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ + 0x0410, 0x042f, 532, /* А-Я а-я */ + 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ + 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ + 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ + 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ + 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ + 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ + 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ + 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ + 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ + 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ + 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ + 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ + 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ + 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ + 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ + 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ + 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ + 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ + 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ + 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ + 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ + 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ + 0xff21, 0xff3a, 532, /* A-Z a-z */ +}; + +/* + * upper case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __tolower1[] = +{ + 0x0100, 501, /* Ā ā */ + 0x0102, 501, /* Ă ă */ + 0x0104, 501, /* Ą ą */ + 0x0106, 501, /* Ć ć */ + 0x0108, 501, /* Ĉ ĉ */ + 0x010a, 501, /* Ċ ċ */ + 0x010c, 501, /* Č č */ + 0x010e, 501, /* Ď ď */ + 0x0110, 501, /* Đ đ */ + 0x0112, 501, /* Ē ē */ + 0x0114, 501, /* Ĕ ĕ */ + 0x0116, 501, /* Ė ė */ + 0x0118, 501, /* Ę ę */ + 0x011a, 501, /* Ě ě */ + 0x011c, 501, /* Ĝ ĝ */ + 0x011e, 501, /* Ğ ğ */ + 0x0120, 501, /* Ġ ġ */ + 0x0122, 501, /* Ģ ģ */ + 0x0124, 501, /* Ĥ ĥ */ + 0x0126, 501, /* Ħ ħ */ + 0x0128, 501, /* Ĩ ĩ */ + 0x012a, 501, /* Ī ī */ + 0x012c, 501, /* Ĭ ĭ */ + 0x012e, 501, /* Į į */ + 0x0130, 301, /* İ i */ + 0x0132, 501, /* IJ ij */ + 0x0134, 501, /* Ĵ ĵ */ + 0x0136, 501, /* Ķ ķ */ + 0x0139, 501, /* Ĺ ĺ */ + 0x013b, 501, /* Ļ ļ */ + 0x013d, 501, /* Ľ ľ */ + 0x013f, 501, /* Ŀ ŀ */ + 0x0141, 501, /* Ł ł */ + 0x0143, 501, /* Ń ń */ + 0x0145, 501, /* Ņ ņ */ + 0x0147, 501, /* Ň ň */ + 0x014a, 501, /* Ŋ ŋ */ + 0x014c, 501, /* Ō ō */ + 0x014e, 501, /* Ŏ ŏ */ + 0x0150, 501, /* Ő ő */ + 0x0152, 501, /* Œ œ */ + 0x0154, 501, /* Ŕ ŕ */ + 0x0156, 501, /* Ŗ ŗ */ + 0x0158, 501, /* Ř ř */ + 0x015a, 501, /* Ś ś */ + 0x015c, 501, /* Ŝ ŝ */ + 0x015e, 501, /* Ş ş */ + 0x0160, 501, /* Š š */ + 0x0162, 501, /* Ţ ţ */ + 0x0164, 501, /* Ť ť */ + 0x0166, 501, /* Ŧ ŧ */ + 0x0168, 501, /* Ũ ũ */ + 0x016a, 501, /* Ū ū */ + 0x016c, 501, /* Ŭ ŭ */ + 0x016e, 501, /* Ů ů */ + 0x0170, 501, /* Ű ű */ + 0x0172, 501, /* Ų ų */ + 0x0174, 501, /* Ŵ ŵ */ + 0x0176, 501, /* Ŷ ŷ */ + 0x0178, 379, /* Ÿ ÿ */ + 0x0179, 501, /* Ź ź */ + 0x017b, 501, /* Ż ż */ + 0x017d, 501, /* Ž ž */ + 0x0181, 710, /* Ɓ ɓ */ + 0x0182, 501, /* Ƃ ƃ */ + 0x0184, 501, /* Ƅ ƅ */ + 0x0186, 706, /* Ɔ ɔ */ + 0x0187, 501, /* Ƈ ƈ */ + 0x018b, 501, /* Ƌ ƌ */ + 0x0190, 703, /* Ɛ ɛ */ + 0x0191, 501, /* Ƒ ƒ */ + 0x0193, 705, /* Ɠ ɠ */ + 0x0194, 707, /* Ɣ ɣ */ + 0x0196, 711, /* Ɩ ɩ */ + 0x0197, 709, /* Ɨ ɨ */ + 0x0198, 501, /* Ƙ ƙ */ + 0x019c, 711, /* Ɯ ɯ */ + 0x019d, 713, /* Ɲ ɲ */ + 0x01a0, 501, /* Ơ ơ */ + 0x01a2, 501, /* Ƣ ƣ */ + 0x01a4, 501, /* Ƥ ƥ */ + 0x01a7, 501, /* Ƨ ƨ */ + 0x01a9, 718, /* Ʃ ʃ */ + 0x01ac, 501, /* Ƭ ƭ */ + 0x01ae, 718, /* Ʈ ʈ */ + 0x01af, 501, /* Ư ư */ + 0x01b3, 501, /* Ƴ ƴ */ + 0x01b5, 501, /* Ƶ ƶ */ + 0x01b7, 719, /* Ʒ ʒ */ + 0x01b8, 501, /* Ƹ ƹ */ + 0x01bc, 501, /* Ƽ ƽ */ + 0x01c4, 502, /* DŽ dž */ + 0x01c5, 501, /* Dž dž */ + 0x01c7, 502, /* LJ lj */ + 0x01c8, 501, /* Lj lj */ + 0x01ca, 502, /* NJ nj */ + 0x01cb, 501, /* Nj nj */ + 0x01cd, 501, /* Ǎ ǎ */ + 0x01cf, 501, /* Ǐ ǐ */ + 0x01d1, 501, /* Ǒ ǒ */ + 0x01d3, 501, /* Ǔ ǔ */ + 0x01d5, 501, /* Ǖ ǖ */ + 0x01d7, 501, /* Ǘ ǘ */ + 0x01d9, 501, /* Ǚ ǚ */ + 0x01db, 501, /* Ǜ ǜ */ + 0x01de, 501, /* Ǟ ǟ */ + 0x01e0, 501, /* Ǡ ǡ */ + 0x01e2, 501, /* Ǣ ǣ */ + 0x01e4, 501, /* Ǥ ǥ */ + 0x01e6, 501, /* Ǧ ǧ */ + 0x01e8, 501, /* Ǩ ǩ */ + 0x01ea, 501, /* Ǫ ǫ */ + 0x01ec, 501, /* Ǭ ǭ */ + 0x01ee, 501, /* Ǯ ǯ */ + 0x01f1, 502, /* DZ dz */ + 0x01f2, 501, /* Dz dz */ + 0x01f4, 501, /* Ǵ ǵ */ + 0x01fa, 501, /* Ǻ ǻ */ + 0x01fc, 501, /* Ǽ ǽ */ + 0x01fe, 501, /* Ǿ ǿ */ + 0x0200, 501, /* Ȁ ȁ */ + 0x0202, 501, /* Ȃ ȃ */ + 0x0204, 501, /* Ȅ ȅ */ + 0x0206, 501, /* Ȇ ȇ */ + 0x0208, 501, /* Ȉ ȉ */ + 0x020a, 501, /* Ȋ ȋ */ + 0x020c, 501, /* Ȍ ȍ */ + 0x020e, 501, /* Ȏ ȏ */ + 0x0210, 501, /* Ȑ ȑ */ + 0x0212, 501, /* Ȓ ȓ */ + 0x0214, 501, /* Ȕ ȕ */ + 0x0216, 501, /* Ȗ ȗ */ + 0x0386, 538, /* Ά ά */ + 0x038c, 564, /* Ό ό */ + 0x03e2, 501, /* Ϣ ϣ */ + 0x03e4, 501, /* Ϥ ϥ */ + 0x03e6, 501, /* Ϧ ϧ */ + 0x03e8, 501, /* Ϩ ϩ */ + 0x03ea, 501, /* Ϫ ϫ */ + 0x03ec, 501, /* Ϭ ϭ */ + 0x03ee, 501, /* Ϯ ϯ */ + 0x0460, 501, /* Ѡ ѡ */ + 0x0462, 501, /* Ѣ ѣ */ + 0x0464, 501, /* Ѥ ѥ */ + 0x0466, 501, /* Ѧ ѧ */ + 0x0468, 501, /* Ѩ ѩ */ + 0x046a, 501, /* Ѫ ѫ */ + 0x046c, 501, /* Ѭ ѭ */ + 0x046e, 501, /* Ѯ ѯ */ + 0x0470, 501, /* Ѱ ѱ */ + 0x0472, 501, /* Ѳ ѳ */ + 0x0474, 501, /* Ѵ ѵ */ + 0x0476, 501, /* Ѷ ѷ */ + 0x0478, 501, /* Ѹ ѹ */ + 0x047a, 501, /* Ѻ ѻ */ + 0x047c, 501, /* Ѽ ѽ */ + 0x047e, 501, /* Ѿ ѿ */ + 0x0480, 501, /* Ҁ ҁ */ + 0x0490, 501, /* Ґ ґ */ + 0x0492, 501, /* Ғ ғ */ + 0x0494, 501, /* Ҕ ҕ */ + 0x0496, 501, /* Җ җ */ + 0x0498, 501, /* Ҙ ҙ */ + 0x049a, 501, /* Қ қ */ + 0x049c, 501, /* Ҝ ҝ */ + 0x049e, 501, /* Ҟ ҟ */ + 0x04a0, 501, /* Ҡ ҡ */ + 0x04a2, 501, /* Ң ң */ + 0x04a4, 501, /* Ҥ ҥ */ + 0x04a6, 501, /* Ҧ ҧ */ + 0x04a8, 501, /* Ҩ ҩ */ + 0x04aa, 501, /* Ҫ ҫ */ + 0x04ac, 501, /* Ҭ ҭ */ + 0x04ae, 501, /* Ү ү */ + 0x04b0, 501, /* Ұ ұ */ + 0x04b2, 501, /* Ҳ ҳ */ + 0x04b4, 501, /* Ҵ ҵ */ + 0x04b6, 501, /* Ҷ ҷ */ + 0x04b8, 501, /* Ҹ ҹ */ + 0x04ba, 501, /* Һ һ */ + 0x04bc, 501, /* Ҽ ҽ */ + 0x04be, 501, /* Ҿ ҿ */ + 0x04c1, 501, /* Ӂ ӂ */ + 0x04c3, 501, /* Ӄ ӄ */ + 0x04c7, 501, /* Ӈ ӈ */ + 0x04cb, 501, /* Ӌ ӌ */ + 0x04d0, 501, /* Ӑ ӑ */ + 0x04d2, 501, /* Ӓ ӓ */ + 0x04d4, 501, /* Ӕ ӕ */ + 0x04d6, 501, /* Ӗ ӗ */ + 0x04d8, 501, /* Ә ә */ + 0x04da, 501, /* Ӛ ӛ */ + 0x04dc, 501, /* Ӝ ӝ */ + 0x04de, 501, /* Ӟ ӟ */ + 0x04e0, 501, /* Ӡ ӡ */ + 0x04e2, 501, /* Ӣ ӣ */ + 0x04e4, 501, /* Ӥ ӥ */ + 0x04e6, 501, /* Ӧ ӧ */ + 0x04e8, 501, /* Ө ө */ + 0x04ea, 501, /* Ӫ ӫ */ + 0x04ee, 501, /* Ӯ ӯ */ + 0x04f0, 501, /* Ӱ ӱ */ + 0x04f2, 501, /* Ӳ ӳ */ + 0x04f4, 501, /* Ӵ ӵ */ + 0x04f8, 501, /* Ӹ ӹ */ + 0x1e00, 501, /* Ḁ ḁ */ + 0x1e02, 501, /* Ḃ ḃ */ + 0x1e04, 501, /* Ḅ ḅ */ + 0x1e06, 501, /* Ḇ ḇ */ + 0x1e08, 501, /* Ḉ ḉ */ + 0x1e0a, 501, /* Ḋ ḋ */ + 0x1e0c, 501, /* Ḍ ḍ */ + 0x1e0e, 501, /* Ḏ ḏ */ + 0x1e10, 501, /* Ḑ ḑ */ + 0x1e12, 501, /* Ḓ ḓ */ + 0x1e14, 501, /* Ḕ ḕ */ + 0x1e16, 501, /* Ḗ ḗ */ + 0x1e18, 501, /* Ḙ ḙ */ + 0x1e1a, 501, /* Ḛ ḛ */ + 0x1e1c, 501, /* Ḝ ḝ */ + 0x1e1e, 501, /* Ḟ ḟ */ + 0x1e20, 501, /* Ḡ ḡ */ + 0x1e22, 501, /* Ḣ ḣ */ + 0x1e24, 501, /* Ḥ ḥ */ + 0x1e26, 501, /* Ḧ ḧ */ + 0x1e28, 501, /* Ḩ ḩ */ + 0x1e2a, 501, /* Ḫ ḫ */ + 0x1e2c, 501, /* Ḭ ḭ */ + 0x1e2e, 501, /* Ḯ ḯ */ + 0x1e30, 501, /* Ḱ ḱ */ + 0x1e32, 501, /* Ḳ ḳ */ + 0x1e34, 501, /* Ḵ ḵ */ + 0x1e36, 501, /* Ḷ ḷ */ + 0x1e38, 501, /* Ḹ ḹ */ + 0x1e3a, 501, /* Ḻ ḻ */ + 0x1e3c, 501, /* Ḽ ḽ */ + 0x1e3e, 501, /* Ḿ ḿ */ + 0x1e40, 501, /* Ṁ ṁ */ + 0x1e42, 501, /* Ṃ ṃ */ + 0x1e44, 501, /* Ṅ ṅ */ + 0x1e46, 501, /* Ṇ ṇ */ + 0x1e48, 501, /* Ṉ ṉ */ + 0x1e4a, 501, /* Ṋ ṋ */ + 0x1e4c, 501, /* Ṍ ṍ */ + 0x1e4e, 501, /* Ṏ ṏ */ + 0x1e50, 501, /* Ṑ ṑ */ + 0x1e52, 501, /* Ṓ ṓ */ + 0x1e54, 501, /* Ṕ ṕ */ + 0x1e56, 501, /* Ṗ ṗ */ + 0x1e58, 501, /* Ṙ ṙ */ + 0x1e5a, 501, /* Ṛ ṛ */ + 0x1e5c, 501, /* Ṝ ṝ */ + 0x1e5e, 501, /* Ṟ ṟ */ + 0x1e60, 501, /* Ṡ ṡ */ + 0x1e62, 501, /* Ṣ ṣ */ + 0x1e64, 501, /* Ṥ ṥ */ + 0x1e66, 501, /* Ṧ ṧ */ + 0x1e68, 501, /* Ṩ ṩ */ + 0x1e6a, 501, /* Ṫ ṫ */ + 0x1e6c, 501, /* Ṭ ṭ */ + 0x1e6e, 501, /* Ṯ ṯ */ + 0x1e70, 501, /* Ṱ ṱ */ + 0x1e72, 501, /* Ṳ ṳ */ + 0x1e74, 501, /* Ṵ ṵ */ + 0x1e76, 501, /* Ṷ ṷ */ + 0x1e78, 501, /* Ṹ ṹ */ + 0x1e7a, 501, /* Ṻ ṻ */ + 0x1e7c, 501, /* Ṽ ṽ */ + 0x1e7e, 501, /* Ṿ ṿ */ + 0x1e80, 501, /* Ẁ ẁ */ + 0x1e82, 501, /* Ẃ ẃ */ + 0x1e84, 501, /* Ẅ ẅ */ + 0x1e86, 501, /* Ẇ ẇ */ + 0x1e88, 501, /* Ẉ ẉ */ + 0x1e8a, 501, /* Ẋ ẋ */ + 0x1e8c, 501, /* Ẍ ẍ */ + 0x1e8e, 501, /* Ẏ ẏ */ + 0x1e90, 501, /* Ẑ ẑ */ + 0x1e92, 501, /* Ẓ ẓ */ + 0x1e94, 501, /* Ẕ ẕ */ + 0x1ea0, 501, /* Ạ ạ */ + 0x1ea2, 501, /* Ả ả */ + 0x1ea4, 501, /* Ấ ấ */ + 0x1ea6, 501, /* Ầ ầ */ + 0x1ea8, 501, /* Ẩ ẩ */ + 0x1eaa, 501, /* Ẫ ẫ */ + 0x1eac, 501, /* Ậ ậ */ + 0x1eae, 501, /* Ắ ắ */ + 0x1eb0, 501, /* Ằ ằ */ + 0x1eb2, 501, /* Ẳ ẳ */ + 0x1eb4, 501, /* Ẵ ẵ */ + 0x1eb6, 501, /* Ặ ặ */ + 0x1eb8, 501, /* Ẹ ẹ */ + 0x1eba, 501, /* Ẻ ẻ */ + 0x1ebc, 501, /* Ẽ ẽ */ + 0x1ebe, 501, /* Ế ế */ + 0x1ec0, 501, /* Ề ề */ + 0x1ec2, 501, /* Ể ể */ + 0x1ec4, 501, /* Ễ ễ */ + 0x1ec6, 501, /* Ệ ệ */ + 0x1ec8, 501, /* Ỉ ỉ */ + 0x1eca, 501, /* Ị ị */ + 0x1ecc, 501, /* Ọ ọ */ + 0x1ece, 501, /* Ỏ ỏ */ + 0x1ed0, 501, /* Ố ố */ + 0x1ed2, 501, /* Ồ ồ */ + 0x1ed4, 501, /* Ổ ổ */ + 0x1ed6, 501, /* Ỗ ỗ */ + 0x1ed8, 501, /* Ộ ộ */ + 0x1eda, 501, /* Ớ ớ */ + 0x1edc, 501, /* Ờ ờ */ + 0x1ede, 501, /* Ở ở */ + 0x1ee0, 501, /* Ỡ ỡ */ + 0x1ee2, 501, /* Ợ ợ */ + 0x1ee4, 501, /* Ụ ụ */ + 0x1ee6, 501, /* Ủ ủ */ + 0x1ee8, 501, /* Ứ ứ */ + 0x1eea, 501, /* Ừ ừ */ + 0x1eec, 501, /* Ử ử */ + 0x1eee, 501, /* Ữ ữ */ + 0x1ef0, 501, /* Ự ự */ + 0x1ef2, 501, /* Ỳ ỳ */ + 0x1ef4, 501, /* Ỵ ỵ */ + 0x1ef6, 501, /* Ỷ ỷ */ + 0x1ef8, 501, /* Ỹ ỹ */ + 0x1f59, 492, /* Ὑ ὑ */ + 0x1f5b, 492, /* Ὓ ὓ */ + 0x1f5d, 492, /* Ὕ ὕ */ + 0x1f5f, 492, /* Ὗ ὗ */ + 0x1fbc, 491, /* ᾼ ᾳ */ + 0x1fcc, 491, /* ῌ ῃ */ + 0x1fec, 493, /* Ῥ ῥ */ + 0x1ffc, 491, /* ῼ ῳ */ +}; + +/* + * title characters are those between + * upper and lower case. ie DZ Dz dz + */ +static +Rune __totitle1[] = +{ + 0x01c4, 501, /* DŽ Dž */ + 0x01c6, 499, /* dž Dž */ + 0x01c7, 501, /* LJ Lj */ + 0x01c9, 499, /* lj Lj */ + 0x01ca, 501, /* NJ Nj */ + 0x01cc, 499, /* nj Nj */ + 0x01f1, 501, /* DZ Dz */ + 0x01f3, 499, /* dz Dz */ +}; + +static Rune* +bsearch(Rune c, Rune *t, int n, int ne) +{ + Rune *p; + int m; + + while(n > 1) { + m = n/2; + p = t + m*ne; + if(c >= p[0]) { + t = p; + n = n-m; + } else + n = m; + } + if(n && c >= t[0]) + return t; + return 0; +} + +Rune +tolowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +toupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +totitlerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +int +islowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isalpharune(Rune c) +{ + Rune *p; + + if(isupperrune(c) || islowerrune(c)) + return 1; + p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __alpha1, nelem(__alpha1), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +int +istitlerune(Rune c) +{ + return isupperrune(c) && islowerrune(c); +} + +int +isspacerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __space2, nelem(__space2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} diff --git a/mk/libutf/utf.7 b/mk/libutf/utf.7 @@ -0,0 +1,99 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH UTF 7 +.SH NAME +UTF, Unicode, ASCII, rune \- character set and format +.SH DESCRIPTION +The Plan 9 character set and representation are +based on the Unicode Standard and on the ISO multibyte +.SM UTF-8 +encoding (Universal Character +Set Transformation Format, 8 bits wide). +The Unicode Standard represents its characters in 16 +bits; +.SM UTF-8 +represents such +values in an 8-bit byte stream. +Throughout this manual, +.SM UTF-8 +is shortened to +.SM UTF. +.PP +In Plan 9, a +.I rune +is a 16-bit quantity representing a Unicode character. +Internally, programs may store characters as runes. +However, any external manifestation of textual information, +in files or at the interface between programs, uses a +machine-independent, byte-stream encoding called +.SM UTF. +.PP +.SM UTF +is designed so the 7-bit +.SM ASCII +set (values hexadecimal 00 to 7F), +appear only as themselves +in the encoding. +Runes with values above 7F appear as sequences of two or more +bytes with values only from 80 to FF. +.PP +The +.SM UTF +encoding of the Unicode Standard is backward compatible with +.SM ASCII\c +: +programs presented only with +.SM ASCII +work on Plan 9 +even if not written to deal with +.SM UTF, +as do +programs that deal with uninterpreted byte streams. +However, programs that perform semantic processing on +.SM ASCII +graphic +characters must convert from +.SM UTF +to runes +in order to work properly with non-\c +.SM ASCII +input. +See +.IR rune (3). +.PP +Letting numbers be binary, +a rune x is converted to a multibyte +.SM UTF +sequence +as follows: +.PP +01. x in [00000000.0bbbbbbb] → 0bbbbbbb +.br +10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb +.br +11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb +.br +.PP +Conversion 01 provides a one-byte sequence that spans the +.SM ASCII +character set in a compatible way. +Conversions 10 and 11 represent higher-valued characters +as sequences of two or three bytes with the high bit set. +Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open. +When there are multiple ways to encode a value, for example rune 0, +the shortest encoding is used. +.PP +In the inverse mapping, +any sequence except those described above +is incorrect and is converted to rune hexadecimal 0080. +.SH "SEE ALSO" +.IR ascii (1), +.IR tcs (1), +.IR rune (3), +.IR "The Unicode Standard" . diff --git a/mk/libutf/utf.h b/mk/libutf/utf.h @@ -0,0 +1,54 @@ +#ifndef _UTF_H_ +#define _UTF_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +typedef unsigned int Rune; /* 32 bits */ + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF /* maximum rune value */ +}; + +/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ +int chartorune(Rune *rune, char *str); +int fullrune(char *str, int n); +int isalpharune(Rune c); +int islowerrune(Rune c); +int isspacerune(Rune c); +int istitlerune(Rune c); +int isupperrune(Rune c); +int runelen(long c); +int runenlen(Rune *r, int nrune); +Rune* runestrcat(Rune *s1, Rune *s2); +Rune* runestrchr(Rune *s, Rune c); +int runestrcmp(Rune *s1, Rune *s2); +Rune* runestrcpy(Rune *s1, Rune *s2); +Rune* runestrdup(Rune *s) ; +Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2); +long runestrlen(Rune *s); +Rune* runestrncat(Rune *s1, Rune *s2, long n); +int runestrncmp(Rune *s1, Rune *s2, long n); +Rune* runestrncpy(Rune *s1, Rune *s2, long n); +Rune* runestrrchr(Rune *s, Rune c); +Rune* runestrstr(Rune *s1, Rune *s2); +int runetochar(char *str, Rune *rune); +Rune tolowerrune(Rune c); +Rune totitlerune(Rune c); +Rune toupperrune(Rune c); +char* utfecpy(char *to, char *e, char *from); +int utflen(char *s); +int utfnlen(char *s, long m); +char* utfrrune(char *s, long c); +char* utfrune(char *s, long c); +char* utfutf(char *s1, char *s2); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libutf/utfdef.h b/mk/libutf/utfdef.h @@ -0,0 +1,33 @@ +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long uvlong; +typedef long long vlong; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem ((void*)0) + diff --git a/mk/libutf/utfecpy.c b/mk/libutf/utfecpy.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#define _BSD_SOURCE 1 /* memccpy */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfecpy(char *to, char *e, char *from) +{ + char *end; + + if(to >= e) + return to; + end = memccpy(to, from, '\0', e - to); + if(end == nil){ + end = e-1; + while(end>to && (*--end&0xC0)==0x80) + ; + *end = '\0'; + }else{ + end--; + } + return end; +} diff --git a/mk/libutf/utflen.c b/mk/libutf/utflen.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +utflen(char *s) +{ + int c; + long n; + Rune rune; + + n = 0; + for(;;) { + c = *(uchar*)s; + if(c < Runeself) { + if(c == 0) + return n; + s++; + } else + s += chartorune(&rune, s); + n++; + } +} diff --git a/mk/libutf/utfnlen.c b/mk/libutf/utfnlen.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +utfnlen(char *s, long m) +{ + int c; + long n; + Rune rune; + char *es; + + es = s + m; + for(n = 0; s < es; n++) { + c = *(uchar*)s; + if(c < Runeself){ + if(c == '\0') + break; + s++; + continue; + } + if(!fullrune(s, es-s)) + break; + s += chartorune(&rune, s); + } + return n; +} diff --git a/mk/libutf/utfrrune.c b/mk/libutf/utfrrune.c @@ -0,0 +1,45 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfrrune(char *s, long c) +{ + long c1; + Rune r; + char *s1; + + if(c < Runesync) /* not part of utf sequence */ + return strrchr(s, c); + + s1 = 0; + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return s1; + if(c1 == c) + s1 = s; + s++; + continue; + } + c1 = chartorune(&r, s); + if(r == c) + s1 = s; + s += c1; + } +} diff --git a/mk/libutf/utfrune.c b/mk/libutf/utfrune.c @@ -0,0 +1,44 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfrune(char *s, long c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr(s, c); + + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return s; + s += n; + } +} diff --git a/mk/libutf/utfutf.c b/mk/libutf/utfutf.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +char* +utfutf(char *s1, char *s2) +{ + char *p; + long f, n1, n2; + Rune r; + + n1 = chartorune(&r, s2); + f = r; + if(f <= Runesync) /* represents self */ + return strstr(s1, s2); + + n2 = strlen(s2); + for(p=s1; p=utfrune(p, f); p+=n1) + if(strncmp(p, s2, n2) == 0) + return p; + return 0; +} diff --git a/mk/mk/NOTICE b/mk/mk/NOTICE @@ -0,0 +1,34 @@ +This copyright NOTICE applies to all files in this directory and +subdirectories, unless another copyright notice appears in a given +file or subdirectory. If you take substantial code from this software to use in +other programs, you must somehow include with it an appropriate +copyright notice that includes the copyright notice and the other +notices below. It is fine (and often tidier) to do that in a separate +file such as NOTICE, LICENCE or COPYING. + + Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. + Revisions Copyright © 2000-2003 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/mk/README b/mk/mk/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/mk/arc.c b/mk/mk/arc.c @@ -0,0 +1,52 @@ +#include "mk.h" + +Arc * +newarc(Node *n, Rule *r, char *stem, Resub *match) +{ + Arc *a; + + a = (Arc *)Malloc(sizeof(Arc)); + a->n = n; + a->r = r; + a->stem = strdup(stem); + rcopy(a->match, match, NREGEXP); + a->next = 0; + a->flag = 0; + a->prog = r->prog; + return(a); +} + +void +dumpa(char *s, Arc *a) +{ + char buf[1024]; + + Bprint(&bout, "%sArc@%p: n=%p r=%p flag=0x%x stem='%s'", + s, a, a->n, a->r, a->flag, a->stem); + if(a->prog) + Bprint(&bout, " prog='%s'", a->prog); + Bprint(&bout, "\n"); + + if(a->n){ + snprint(buf, sizeof(buf), "%s ", (*s == ' ')? s:""); + dumpn(buf, a->n); + } +} + +void +nrep(void) +{ + Symtab *sym; + Word *w; + + sym = symlook("NREP", S_VAR, 0); + if(sym){ + w = sym->u.ptr; + if (w && w->s && *w->s) + nreps = atoi(w->s); + } + if(nreps < 1) + nreps = 1; + if(DEBUG(D_GRAPH)) + Bprint(&bout, "nreps = %d\n", nreps); +} diff --git a/mk/mk/archive.c b/mk/mk/archive.c @@ -0,0 +1,253 @@ +#include "mk.h" +#define ARMAG "!<arch>\n" +#define SARMAG 8 + +#define ARFMAG "`\n" +#define SARNAME 16 + +struct ar_hdr +{ + char name[SARNAME]; + char date[12]; + char uid[6]; + char gid[6]; + char mode[8]; + char size[10]; + char fmag[2]; +}; +#define SAR_HDR (SARNAME+44) + +static int dolong = 1; + +static void atimes(char *); +static char *split(char*, char**); + +long +readn(int f, void *av, long n) +{ + char *a; + long m, t; + + a = av; + t = 0; + while(t < n){ + m = read(f, a+t, n-t); + if(m <= 0){ + if(t == 0) + return m; + break; + } + t += m; + } + return t; +} +long +atimeof(int force, char *name) +{ + Symtab *sym; + long t; + char *archive, *member, buf[512]; + + archive = split(name, &member); + if(archive == 0) + Exit(); + + t = mtime(archive); + sym = symlook(archive, S_AGG, 0); + if(sym){ + if(force || (t > sym->u.value)){ + atimes(archive); + sym->u.value = t; + } + } + else{ + atimes(archive); + /* mark the aggegate as having been done */ + symlook(strdup(archive), S_AGG, "")->u.value = t; + } + /* truncate long member name to sizeof of name field in archive header */ + if(dolong) + snprint(buf, sizeof(buf), "%s(%s)", archive, member); + else + snprint(buf, sizeof(buf), "%s(%.*s)", archive, SARNAME, member); + sym = symlook(buf, S_TIME, 0); + if (sym) + return sym->u.value; + return 0; +} + +void +atouch(char *name) +{ + char *archive, *member; + int fd, i; + struct ar_hdr h; + long t; + + archive = split(name, &member); + if(archive == 0) + Exit(); + + fd = open(archive, ORDWR); + if(fd < 0){ + fd = create(archive, OWRITE, 0666); + if(fd < 0){ + fprint(2, "create %s: %r\n", archive); + Exit(); + } + write(fd, ARMAG, SARMAG); + } + if(symlook(name, S_TIME, 0)){ + /* hoon off and change it in situ */ + LSEEK(fd, SARMAG, 0); + while(read(fd, (char *)&h, sizeof(h)) == sizeof(h)){ + for(i = SARNAME-1; i > 0 && h.name[i] == ' '; i--) + ; + h.name[i+1]=0; + if(strcmp(member, h.name) == 0){ + t = SARNAME-sizeof(h); /* ughgghh */ + LSEEK(fd, t, 1); + fprint(fd, "%-12ld", time(0)); + break; + } + t = atol(h.size); + if(t&01) t++; + LSEEK(fd, t, 1); + } + } + close(fd); +} + +static void +atimes(char *ar) +{ + struct ar_hdr h; + long t; + int fd, i, namelen; + char buf[2048], *p, *strings; + char name[1024]; + Symtab *sym; + + strings = nil; + fd = open(ar, OREAD); + if(fd < 0) + return; + + if(read(fd, buf, SARMAG) != SARMAG){ + close(fd); + return; + } + while(readn(fd, (char *)&h, sizeof(h)) == sizeof(h)){ + t = atol(h.date); + if(t == 0) /* as it sometimes happens; thanks ken */ + t = 1; + namelen = 0; + if(memcmp(h.name, "#1/", 3) == 0){ /* BSD */ + namelen = atoi(h.name+3); + if(namelen >= sizeof name){ + namelen = 0; + goto skip; + } + if(readn(fd, name, namelen) != namelen) + break; + name[namelen] = 0; + }else if(memcmp(h.name, "// ", 2) == 0){ /* GNU */ + /* date, uid, gid, mode all ' ' */ + for(i=2; i<16+12+6+6+8; i++) + if(h.name[i] != ' ') + goto skip; + t = atol(h.size); + if(t&01) + t++; + free(strings); + strings = malloc(t+1); + if(strings){ + if(readn(fd, strings, t) != t){ + free(strings); + strings = nil; + break; + } + strings[t] = 0; + continue; + } + goto skip; + }else if(strings && h.name[0]=='/' && isdigit((uchar)h.name[1])){ + i = strtol(h.name+1, &p, 10); + if(*p != ' ' || i >= strlen(strings)) + goto skip; + p = strings+i; + for(; *p && *p != '/'; p++) + ; + namelen = p-(strings+i); + if(namelen >= sizeof name){ + namelen = 0; + goto skip; + } + memmove(name, strings+i, namelen); + name[namelen] = 0; + namelen = 0; + }else{ + strncpy(name, h.name, sizeof(h.name)); + for(i = sizeof(h.name)-1; i > 0 && name[i] == ' '; i--) + ; + if(name[i] == '/') /* system V bug */ + i--; + name[i+1]=0; + } + snprint(buf, sizeof buf, "%s(%s)", ar, name); + sym = symlook(strdup(buf), S_TIME, (void *)t); + sym->u.value = t; + skip: + t = atol(h.size); + if(t&01) t++; + t -= namelen; + LSEEK(fd, t, 1); + } + close(fd); + free(strings); +} + +static int +type(char *file) +{ + int fd; + char buf[SARMAG]; + + fd = open(file, OREAD); + if(fd < 0){ + if(symlook(file, S_BITCH, 0) == 0){ + if(strlen(file) < 2 || strcmp(file+strlen(file)-2, ".a") != 0) + Bprint(&bout, "%s doesn't exist: assuming it will be an archive\n", file); + symlook(file, S_BITCH, (void *)file); + } + return 1; + } + if(read(fd, buf, SARMAG) != SARMAG){ + close(fd); + return 0; + } + close(fd); + return !strncmp(ARMAG, buf, SARMAG); +} + +static char* +split(char *name, char **member) +{ + char *p, *q; + + p = strdup(name); + q = utfrune(p, '('); + if(q){ + *q++ = 0; + if(member) + *member = q; + q = utfrune(q, ')'); + if (q) + *q = 0; + if(type(p)) + return p; + free(p); + fprint(2, "mk: '%s' is not an archive\n", name); + } + return 0; +} diff --git a/mk/mk/bufblock.c b/mk/mk/bufblock.c @@ -0,0 +1,88 @@ +#include "mk.h" + +static Bufblock *freelist; +#define QUANTA 4096 + +Bufblock * +newbuf(void) +{ + Bufblock *p; + + if (freelist) { + p = freelist; + freelist = freelist->next; + } else { + p = (Bufblock *) Malloc(sizeof(Bufblock)); + p->start = Malloc(QUANTA*sizeof(*p->start)); + p->end = p->start+QUANTA; + } + p->current = p->start; + *p->start = 0; + p->next = 0; + return p; +} + +void +freebuf(Bufblock *p) +{ + p->next = freelist; + freelist = p; +} + +void +growbuf(Bufblock *p) +{ + int n; + Bufblock *f; + char *cp; + + n = p->end-p->start+QUANTA; + /* search the free list for a big buffer */ + for (f = freelist; f; f = f->next) { + if (f->end-f->start >= n) { + memcpy(f->start, p->start, p->end-p->start); + cp = f->start; + f->start = p->start; + p->start = cp; + cp = f->end; + f->end = p->end; + p->end = cp; + f->current = f->start; + break; + } + } + if (!f) { /* not found - grow it */ + p->start = Realloc(p->start, n); + p->end = p->start+n; + } + p->current = p->start+n-QUANTA; +} + +void +bufcpy(Bufblock *buf, char *cp, int n) +{ + + while (n--) + insert(buf, *cp++); +} + +void +insert(Bufblock *buf, int c) +{ + + if (buf->current >= buf->end) + growbuf(buf); + *buf->current++ = c; +} + +void +rinsert(Bufblock *buf, Rune r) +{ + int n; + + n = runelen(r); + if (buf->current+n > buf->end) + growbuf(buf); + runetochar(buf->current, &r); + buf->current += n; +} diff --git a/mk/mk/env.c b/mk/mk/env.c @@ -0,0 +1,149 @@ +#include "mk.h" + +enum { + ENVQUANTA=10 +}; + +Envy *envy; +static int nextv; + +static char *myenv[] = +{ + "target", + "stem", + "prereq", + "pid", + "nproc", + "newprereq", + "alltarget", + "newmember", + "stem0", /* must be in order from here */ + "stem1", + "stem2", + "stem3", + "stem4", + "stem5", + "stem6", + "stem7", + "stem8", + "stem9", + 0 +}; + +void +initenv(void) +{ + char **p; + + for(p = myenv; *p; p++) + symlook(*p, S_INTERNAL, (void *)""); + readenv(); /* o.s. dependent */ +} + +static void +envinsert(char *name, Word *value) +{ + static int envsize; + + if (nextv >= envsize) { + envsize += ENVQUANTA; + envy = (Envy *) Realloc((char *) envy, envsize*sizeof(Envy)); + } + envy[nextv].name = name; + envy[nextv++].values = value; +} + +static void +envupd(char *name, Word *value) +{ + Envy *e; + + for(e = envy; e->name; e++) + if(strcmp(name, e->name) == 0){ + delword(e->values); + e->values = value; + return; + } + e->name = name; + e->values = value; + envinsert(0,0); +} + +static void +ecopy(Symtab *s) +{ + char **p; + + if(symlook(s->name, S_NOEXPORT, 0)) + return; + for(p = myenv; *p; p++) + if(strcmp(*p, s->name) == 0) + return; + envinsert(s->name, s->u.ptr); +} + +void +execinit(void) +{ + char **p; + + nextv = 0; + for(p = myenv; *p; p++) + envinsert(*p, stow("")); + + symtraverse(S_VAR, ecopy); + envinsert(0, 0); +} + +Envy* +buildenv(Job *j, int slot) +{ + char **p, *cp, *qp; + Word *w, *v, **l; + int i; + char buf[256]; + + envupd("target", wdup(j->t)); + if(j->r->attr&REGEXP) + envupd("stem",newword("")); + else + envupd("stem", newword(j->stem)); + envupd("prereq", wdup(j->p)); + sprint(buf, "%d", getpid()); + envupd("pid", newword(buf)); + sprint(buf, "%d", slot); + envupd("nproc", newword(buf)); + envupd("newprereq", wdup(j->np)); + envupd("alltarget", wdup(j->at)); + l = &v; + v = w = wdup(j->np); + while(w){ + cp = strchr(w->s, '('); + if(cp){ + qp = strchr(cp+1, ')'); + if(qp){ + *qp = 0; + strcpy(w->s, cp+1); + l = &w->next; + w = w->next; + continue; + } + } + *l = w->next; + free(w->s); + free(w); + w = *l; + } + envupd("newmember", v); + /* update stem0 -> stem9 */ + for(p = myenv; *p; p++) + if(strcmp(*p, "stem0") == 0) + break; + for(i = 0; *p; i++, p++){ + if((j->r->attr&REGEXP) && j->match[i]) + envupd(*p, newword(j->match[i])); + else + envupd(*p, newword("")); + } + return envy; +} diff --git a/mk/mk/file.c b/mk/mk/file.c @@ -0,0 +1,90 @@ +#include "mk.h" + +/* table-driven version in bootes dump of 12/31/96 */ + +long +mtime(char *name) +{ + return mkmtime(name); +} + +long +timeof(char *name, int force) +{ + Symtab *sym; + long t; + + if(utfrune(name, '(')) + return atimeof(force, name); /* archive */ + + if(force) + return mtime(name); + + + sym = symlook(name, S_TIME, 0); + if (sym) + return sym->u.value; + + t = mtime(name); + if(t == 0) + return 0; + + symlook(name, S_TIME, (void*)t); /* install time in cache */ + return t; +} + +void +touch(char *name) +{ + Bprint(&bout, "touch(%s)\n", name); + if(nflag) + return; + + if(utfrune(name, '(')) + atouch(name); /* archive */ + else if(chgtime(name) < 0) { + fprint(2, "%s: %r\n", name); + Exit(); + } +} + +void +delete(char *name) +{ + if(utfrune(name, '(') == 0) { /* file */ + if(remove(name) < 0) + fprint(2, "remove %s: %r\n", name); + } else + fprint(2, "hoon off; mk can'tdelete archive members\n"); +} + +void +timeinit(char *s) +{ + long t; + char *cp; + Rune r; + int c, n; + + t = time(0); + while (*s) { + cp = s; + do{ + n = chartorune(&r, s); + if (r == ' ' || r == ',' || r == '\n') + break; + s += n; + } while(*s); + c = *s; + *s = 0; + symlook(strdup(cp), S_TIME, (void *)t)->u.value = t; + if (c) + *s++ = c; + while(*s){ + n = chartorune(&r, s); + if(r != ' ' && r != ',' && r != '\n') + break; + s += n; + } + } +} diff --git a/mk/mk/fns.h b/mk/mk/fns.h @@ -0,0 +1,88 @@ +#undef waitfor +#define waitfor mkwaitfor + +void addrule(char*, Word*, char*, Word*, int, int, char*); +void addrules(Word*, Word*, char*, int, int, char*); +void addw(Word*, char*); +void assert(char*, int); +int assline(Biobuf *, Bufblock *); +long atimeof(int,char*); +void atouch(char*); +void bufcpy(Bufblock *, char *, int); +Envy *buildenv(Job*, int); +void catchnotes(void); +int chgtime(char*); +void clrmade(Node*); +void delete(char*); +void delword(Word*); +int dorecipe(Node*); +void dumpa(char*, Arc*); +void dumpj(char*, Job*, int); +void dumpn(char*, Node*); +void dumpr(char*, Rule*); +void dumpv(char*); +void dumpw(char*, Word*); +void execinit(void); +int execsh(char*, char*, Bufblock*, Envy*, Shell*, Word*); +void Exit(void); +void expunge(int, char*); +void freebuf(Bufblock*); +void front(char*); +Node *graph(char*); +void growbuf(Bufblock *); +void initenv(void); +void initshell(void); +void insert(Bufblock *, int); +void ipop(void); +void ipush(void); +void killchildren(char*); +void *Malloc(int); +char *maketmp(int*); +int match(char*, char*, char*, Shell*); +char *membername(char*, int, char*); +void mk(char*); +unsigned long mkmtime(char*); +long mtime(char*); +Arc *newarc(Node*, Rule*, char*, Resub*); +Bufblock *newbuf(void); +Job *newjob(Rule*, Node*, char*, char**, Word*, Word*, Word*, Word*); +Word *newword(char*); +int nextrune(Biobuf*, int); +int nextslot(void); +void nproc(void); +void nrep(void); +int outofdate(Node*, Arc*, int); +void parse(char*, int, int); +int pipecmd(char*, Envy*, int*, Shell*, Word*); +void popshell(void); +void prusage(void); +void pushshell(void); +void rcopy(char**, Resub*, int); +void readenv(void); +void *Realloc(void*, int); +void rinsert(Bufblock *, Rune); +char *rulecnt(void); +void run(Job*); +char *setshell(Word*); +void setvar(char*, void*); +int shargv(Word*, int, char***); +char *shname(char*); +void shprint(char*, Envy*, Bufblock*, Shell*); +Word *stow(char*); +void subst(char*, char*, char*); +void symdel(char*, int); +void syminit(void); +Symtab *symlook(char*, int, void*); +void symstat(void); +void symtraverse(int, void(*)(Symtab*)); +void timeinit(char*); +long timeof(char*, int); +void touch(char*); +void update(int, Node*); +void usage(void); +Word *varsub(char**); +int waitfor(char*); +int waitup(int, int*); +Word *wdup(Word*); +int work(Node*, Node*, Arc*); +char *wtos(Word*, int); diff --git a/mk/mk/graph.c b/mk/mk/graph.c @@ -0,0 +1,279 @@ +#include "mk.h" + +static Node *applyrules(char *, char *); +static void togo(Node *); +static int vacuous(Node *); +static Node *newnode(char *); +static void trace(char *, Arc *); +static void cyclechk(Node *); +static void ambiguous(Node *); +static void attribute(Node *); + +Node * +graph(char *target) +{ + Node *node; + char *cnt; + + cnt = rulecnt(); + node = applyrules(target, cnt); + free(cnt); + cyclechk(node); + node->flags |= PROBABLE; /* make sure it doesn't get deleted */ + vacuous(node); + ambiguous(node); + attribute(node); + return(node); +} + +static Node * +applyrules(char *target, char *cnt) +{ + Symtab *sym; + Node *node; + Rule *r; + Arc head, *a = &head; + Word *w; + char stem[NAMEBLOCK], buf[NAMEBLOCK]; + Resub rmatch[NREGEXP]; + +/* print("applyrules(%lux='%s')\n", target, target); */ + sym = symlook(target, S_NODE, 0); + if(sym) + return sym->u.ptr; + target = strdup(target); + node = newnode(target); + head.n = 0; + head.next = 0; + sym = symlook(target, S_TARGET, 0); + memset((char*)rmatch, 0, sizeof(rmatch)); + for(r = sym? sym->u.ptr:0; r; r = r->chain){ + if(r->attr&META) continue; + if(strcmp(target, r->target)) continue; + if((!r->recipe || !*r->recipe) && (!r->tail || !r->tail->s || !*r->tail->s)) continue; /* no effect; ignore */ + if(cnt[r->rule] >= nreps) continue; + cnt[r->rule]++; + node->flags |= PROBABLE; + +/* if(r->attr&VIR) + * node->flags |= VIRTUAL; + * if(r->attr&NOREC) + * node->flags |= NORECIPE; + * if(r->attr&DEL) + * node->flags |= DELETE; + */ + if(!r->tail || !r->tail->s || !*r->tail->s) { + a->next = newarc((Node *)0, r, "", rmatch); + a = a->next; + } else + for(w = r->tail; w; w = w->next){ + a->next = newarc(applyrules(w->s, cnt), r, "", rmatch); + a = a->next; + } + cnt[r->rule]--; + head.n = node; + } + for(r = metarules; r; r = r->next){ + if((!r->recipe || !*r->recipe) && (!r->tail || !r->tail->s || !*r->tail->s)) continue; /* no effect; ignore */ + if ((r->attr&NOVIRT) && a != &head && (a->r->attr&VIR)) + continue; + if(r->attr&REGEXP){ + stem[0] = 0; + patrule = r; + memset((char*)rmatch, 0, sizeof(rmatch)); + if(regexec(r->pat, node->name, rmatch, NREGEXP) == 0) + continue; + } else { + if(!match(node->name, r->target, stem, r->shellt)) continue; + } + if(cnt[r->rule] >= nreps) continue; + cnt[r->rule]++; + +/* if(r->attr&VIR) + * node->flags |= VIRTUAL; + * if(r->attr&NOREC) + * node->flags |= NORECIPE; + * if(r->attr&DEL) + * node->flags |= DELETE; + */ + + if(!r->tail || !r->tail->s || !*r->tail->s) { + a->next = newarc((Node *)0, r, stem, rmatch); + a = a->next; + } else + for(w = r->tail; w; w = w->next){ + if(r->attr&REGEXP) + regsub(w->s, buf, sizeof buf, rmatch, NREGEXP); + else + subst(stem, w->s, buf); + a->next = newarc(applyrules(buf, cnt), r, stem, rmatch); + a = a->next; + } + cnt[r->rule]--; + } + a->next = node->prereqs; + node->prereqs = head.next; + return(node); +} + +static void +togo(Node *node) +{ + Arc *la, *a; + + /* delete them now */ + la = 0; + for(a = node->prereqs; a; la = a, a = a->next) + if(a->flag&TOGO){ + if(a == node->prereqs) + node->prereqs = a->next; + else + la->next = a->next, a = la; + } +} + +static int +vacuous(Node *node) +{ + Arc *la, *a; + int vac = !(node->flags&PROBABLE); + + if(node->flags&READY) + return(node->flags&VACUOUS); + node->flags |= READY; + for(a = node->prereqs; a; a = a->next) + if(a->n && vacuous(a->n) && (a->r->attr&META)) + a->flag |= TOGO; + else + vac = 0; + /* if a rule generated arcs that DON'T go; no others from that rule go */ + for(a = node->prereqs; a; a = a->next) + if((a->flag&TOGO) == 0) + for(la = node->prereqs; la; la = la->next) + if((la->flag&TOGO) && (la->r == a->r)){ + la->flag &= ~TOGO; + } + togo(node); + if(vac) + node->flags |= VACUOUS; + return(vac); +} + +static Node * +newnode(char *name) +{ + register Node *node; + + node = (Node *)Malloc(sizeof(Node)); + symlook(name, S_NODE, (void *)node); + node->name = name; + node->time = timeof(name, 0); + node->prereqs = 0; + node->flags = node->time? PROBABLE : 0; + node->next = 0; + return(node); +} + +void +dumpn(char *s, Node *n) +{ + char buf[1024]; + Arc *a; + + snprint(buf, sizeof buf, "%s ", (*s == ' ')? s:""); + Bprint(&bout, "%s%s@%ld: time=%ld flags=0x%x next=%ld\n", + s, n->name, n, n->time, n->flags, n->next); + for(a = n->prereqs; a; a = a->next) + dumpa(buf, a); +} + +static void +trace(char *s, Arc *a) +{ + fprint(2, "\t%s", s); + while(a){ + fprint(2, " <-(%s:%d)- %s", a->r->file, a->r->line, + a->n? a->n->name:""); + if(a->n){ + for(a = a->n->prereqs; a; a = a->next) + if(*a->r->recipe) break; + } else + a = 0; + } + fprint(2, "\n"); +} + +static void +cyclechk(Node *n) +{ + Arc *a; + + if((n->flags&CYCLE) && n->prereqs){ + fprint(2, "mk: cycle in graph detected at target %s\n", n->name); + Exit(); + } + n->flags |= CYCLE; + for(a = n->prereqs; a; a = a->next) + if(a->n) + cyclechk(a->n); + n->flags &= ~CYCLE; +} + +static void +ambiguous(Node *n) +{ + Arc *a; + Rule *r = 0; + Arc *la; + int bad = 0; + + la = 0; + for(a = n->prereqs; a; a = a->next){ + if(a->n) + ambiguous(a->n); + if(*a->r->recipe == 0) continue; + if(r == 0) + r = a->r, la = a; + else{ + if(r->recipe != a->r->recipe){ + if((r->attr&META) && !(a->r->attr&META)){ + la->flag |= TOGO; + r = a->r, la = a; + } else if(!(r->attr&META) && (a->r->attr&META)){ + a->flag |= TOGO; + continue; + } + } + if(r->recipe != a->r->recipe){ + if(bad == 0){ + fprint(2, "mk: ambiguous recipes for %s:\n", n->name); + bad = 1; + trace(n->name, la); + } + trace(n->name, a); + } + } + } + if(bad) + Exit(); + togo(n); +} + +static void +attribute(Node *n) +{ + register Arc *a; + + for(a = n->prereqs; a; a = a->next){ + if(a->r->attr&VIR) + n->flags |= VIRTUAL; + if(a->r->attr&NOREC) + n->flags |= NORECIPE; + if(a->r->attr&DEL) + n->flags |= DELETE; + if(a->n) + attribute(a->n); + } + if(n->flags&VIRTUAL) + n->time = 0; +} diff --git a/mk/mk/job.c b/mk/mk/job.c @@ -0,0 +1,33 @@ +#include "mk.h" + +Job * +newjob(Rule *r, Node *nlist, char *stem, char **match, Word *pre, Word *npre, Word *tar, Word *atar) +{ + register Job *j; + + j = (Job *)Malloc(sizeof(Job)); + j->r = r; + j->n = nlist; + j->stem = stem; + j->match = match; + j->p = pre; + j->np = npre; + j->t = tar; + j->at = atar; + j->nproc = -1; + j->next = 0; + return(j); +} + +void +dumpj(char *s, Job *j, int all) +{ + Bprint(&bout, "%s\n", s); + while(j){ + Bprint(&bout, "job@%ld: r=%ld n=%ld stem='%s' nproc=%d\n", + j, j->r, j->n, j->stem, j->nproc); + Bprint(&bout, "\ttarget='%s' alltarget='%s' prereq='%s' nprereq='%s'\n", + wtos(j->t, ' '), wtos(j->at, ' '), wtos(j->p, ' '), wtos(j->np, ' ')); + j = all? j->next : 0; + } +} diff --git a/mk/mk/lex.c b/mk/mk/lex.c @@ -0,0 +1,146 @@ +#include "mk.h" + +static int bquote(Biobuf*, Bufblock*); + +/* + * Assemble a line skipping blank lines, comments, and eliding + * escaped newlines + */ +int +assline(Biobuf *bp, Bufblock *buf) +{ + int c; + int lastc; + + buf->current=buf->start; + while ((c = nextrune(bp, 1)) >= 0){ + switch(c) + { + case '\r': /* consumes CRs for Win95 */ + continue; + case '\n': + if (buf->current != buf->start) { + insert(buf, 0); + return 1; + } + break; /* skip empty lines */ + case '\\': + case '\'': + case '"': + rinsert(buf, c); + if (shellt->escapetoken(bp, buf, 1, c) == 0) + Exit(); + break; + case '`': + if (bquote(bp, buf) == 0) + Exit(); + break; + case '#': + lastc = '#'; + while ((c = Bgetc(bp)) != '\n') { + if (c < 0) + goto eof; + if(c != '\r') + lastc = c; + } + mkinline++; + if (lastc == '\\') + break; /* propagate escaped newlines??*/ + if (buf->current != buf->start) { + insert(buf, 0); + return 1; + } + break; + default: + rinsert(buf, c); + break; + } + } +eof: + insert(buf, 0); + return *buf->start != 0; +} + +/* + * assemble a back-quoted shell command into a buffer + */ +static int +bquote(Biobuf *bp, Bufblock *buf) +{ + int c, line, term; + int start; + + line = mkinline; + while((c = Bgetrune(bp)) == ' ' || c == '\t') + ; + if(c == '{'){ + term = '}'; /* rc style */ + while((c = Bgetrune(bp)) == ' ' || c == '\t') + ; + } else + term = '`'; /* sh style */ + + start = buf->current-buf->start; + for(;c > 0; c = nextrune(bp, 0)){ + if(c == term){ + insert(buf, '\n'); + insert(buf,0); + buf->current = buf->start+start; + execinit(); + execsh(0, buf->current, buf, envy, shellt, shellcmd); + return 1; + } + if(c == '\n') + break; + if(c == '\'' || c == '"' || c == '\\'){ + insert(buf, c); + if(!shellt->escapetoken(bp, buf, 1, c)) + return 0; + continue; + } + rinsert(buf, c); + } + SYNERR(line); + fprint(2, "missing closing %c after `\n", term); + return 0; +} + +/* + * get next character stripping escaped newlines + * the flag specifies whether escaped newlines are to be elided or + * replaced with a blank. + */ +int +nextrune(Biobuf *bp, int elide) +{ + int c, c2; + static int savec; + + if(savec){ + c = savec; + savec = 0; + return c; + } + + for (;;) { + c = Bgetrune(bp); + if (c == '\\') { + c2 = Bgetrune(bp); + if(c2 == '\r'){ + savec = c2; + c2 = Bgetrune(bp); + } + if (c2 == '\n') { + savec = 0; + mkinline++; + if (elide) + continue; + return ' '; + } + Bungetrune(bp); + } + if (c == '\n') + mkinline++; + return c; + } +} diff --git a/mk/mk/main.c b/mk/mk/main.c @@ -0,0 +1,287 @@ +#include "mk.h" + +#define MKFILE "mkfile" + +int debug; +Rule *rules, *metarules; +int nflag = 0; +int tflag = 0; +int iflag = 0; +int kflag = 0; +int aflag = 0; +int uflag = 0; +char *explain = 0; +Word *target1; +int nreps = 1; +Job *jobs; +Biobuf bout; +Rule *patrule; +void badusage(void); +#ifdef PROF +short buf[10000]; +#endif + +int +main(int argc, char **argv) +{ + Word *w; + char *s, *temp; + char *files[256], **f = files, **ff; + int sflag = 0; + int i; + int tfd = -1; + Biobuf tb; + Bufblock *buf; + Bufblock *whatif; + + /* + * start with a copy of the current environment variables + * instead of sharing them + */ + + Binit(&bout, 1, OWRITE); + buf = newbuf(); + whatif = 0; + USED(argc); + for(argv++; *argv && (**argv == '-'); argv++) + { + bufcpy(buf, argv[0], strlen(argv[0])); + insert(buf, ' '); + switch(argv[0][1]) + { + case 'a': + aflag = 1; + break; + case 'd': + if(*(s = &argv[0][2])) + while(*s) switch(*s++) + { + case 'p': debug |= D_PARSE; break; + case 'g': debug |= D_GRAPH; break; + case 'e': debug |= D_EXEC; break; + } + else + debug = 0xFFFF; + break; + case 'e': + explain = &argv[0][2]; + break; + case 'f': + if(*++argv == 0) + badusage(); + *f++ = *argv; + bufcpy(buf, argv[0], strlen(argv[0])); + insert(buf, ' '); + break; + case 'i': + iflag = 1; + break; + case 'k': + kflag = 1; + break; + case 'n': + nflag = 1; + break; + case 's': + sflag = 1; + break; + case 't': + tflag = 1; + break; + case 'u': + uflag = 1; + break; + case 'w': + if(whatif == 0) + whatif = newbuf(); + else + insert(whatif, ' '); + if(argv[0][2]) + bufcpy(whatif, &argv[0][2], strlen(&argv[0][2])); + else { + if(*++argv == 0) + badusage(); + bufcpy(whatif, &argv[0][0], strlen(&argv[0][0])); + } + break; + default: + badusage(); + } + } +#ifdef PROF + { + extern etext(); + monitor(main, etext, buf, sizeof buf, 300); + } +#endif + + if(aflag) + iflag = 1; + usage(); + syminit(); + initshell(); + initenv(); + usage(); + + /* + assignment args become null strings + */ + temp = 0; + for(i = 0; argv[i]; i++) if(utfrune(argv[i], '=')){ + bufcpy(buf, argv[i], strlen(argv[i])); + insert(buf, ' '); + if(tfd < 0){ + temp = maketmp(&tfd); + if(temp == 0) { + fprint(2, "temp file: %r\n"); + Exit(); + } + Binit(&tb, tfd, OWRITE); + } + Bprint(&tb, "%s\n", argv[i]); + *argv[i] = 0; + } + if(tfd >= 0){ + Bflush(&tb); + LSEEK(tfd, 0L, 0); + parse("command line args", tfd, 1); + remove(temp); + } + + if (buf->current != buf->start) { + buf->current--; + insert(buf, 0); + } + symlook("MKFLAGS", S_VAR, (void *) stow(buf->start)); + buf->current = buf->start; + for(i = 0; argv[i]; i++){ + if(*argv[i] == 0) continue; + if(i) + insert(buf, ' '); + bufcpy(buf, argv[i], strlen(argv[i])); + } + insert(buf, 0); + symlook("MKARGS", S_VAR, (void *) stow(buf->start)); + freebuf(buf); + + if(f == files){ + if(access(MKFILE, 4) == 0) + parse(MKFILE, open(MKFILE, 0), 0); + } else + for(ff = files; ff < f; ff++) + parse(*ff, open(*ff, 0), 0); + if(DEBUG(D_PARSE)){ + dumpw("default targets", target1); + dumpr("rules", rules); + dumpr("metarules", metarules); + dumpv("variables"); + } + if(whatif){ + insert(whatif, 0); + timeinit(whatif->start); + freebuf(whatif); + } + execinit(); + /* skip assignment args */ + while(*argv && (**argv == 0)) + argv++; + + catchnotes(); + if(*argv == 0){ + if(target1) + for(w = target1; w; w = w->next) + mk(w->s); + else { + fprint(2, "mk: nothing to mk\n"); + Exit(); + } + } else { + if(sflag){ + for(; *argv; argv++) + if(**argv) + mk(*argv); + } else { + Word *head, *tail, *t; + + /* fake a new rule with all the args as prereqs */ + tail = 0; + t = 0; + for(; *argv; argv++) + if(**argv){ + if(tail == 0) + tail = t = newword(*argv); + else { + t->next = newword(*argv); + t = t->next; + } + } + if(tail->next == 0) + mk(tail->s); + else { + head = newword("command line arguments"); + addrules(head, tail, strdup(""), VIR, mkinline, 0); + mk(head->s); + } + } + } + if(uflag) + prusage(); + exits(0); + return 0; +} + +void +badusage(void) +{ + + fprint(2, "Usage: mk [-f file] [-n] [-a] [-e] [-t] [-k] [-i] [-d[egp]] [targets ...]\n"); + Exit(); +} + +void * +Malloc(int n) +{ + register void *s; + + s = malloc(n); + if(!s) { + fprint(2, "mk: cannot alloc %d bytes\n", n); + Exit(); + } + return(s); +} + +void * +Realloc(void *s, int n) +{ + if(s) + s = realloc(s, n); + else + s = malloc(n); + if(!s) { + fprint(2, "mk: cannot alloc %d bytes\n", n); + Exit(); + } + return(s); +} + +void +assert(char *s, int n) +{ + if(!n){ + fprint(2, "mk: Assertion ``%s'' failed.\n", s); + Exit(); + } +} + +void +regerror(char *s) +{ + if(patrule) + fprint(2, "mk: %s:%d: regular expression error; %s\n", + patrule->file, patrule->line, s); + else + fprint(2, "mk: %s:%d: regular expression error; %s\n", + infile, mkinline, s); + Exit(); +} diff --git a/mk/mk/match.c b/mk/mk/match.c @@ -0,0 +1,49 @@ +#include "mk.h" + +int +match(char *name, char *template, char *stem, Shell *sh) +{ + Rune r; + int n; + + while(*name && *template){ + n = chartorune(&r, template); + if (PERCENT(r)) + break; + while (n--) + if(*name++ != *template++) + return 0; + } + if(!PERCENT(*template)) + return 0; + n = strlen(name)-strlen(template+1); + if (n < 0) + return 0; + if (strcmp(template+1, name+n)) + return 0; + strncpy(stem, name, n); + stem[n] = 0; + if(*template == '&') + return !sh->charin(stem, "./"); + return 1; +} + +void +subst(char *stem, char *template, char *dest) +{ + Rune r; + char *s; + int n; + + while(*template){ + n = chartorune(&r, template); + if (PERCENT(r)) { + template += n; + for (s = stem; *s; s++) + *dest++ = *s; + } else + while (n--) + *dest++ = *template++; + } + *dest = 0; +} diff --git a/mk/mk/mk.1 b/mk/mk/mk.1 @@ -0,0 +1,693 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH MK 1 +.SH NAME +mk \- maintain (make) related files +.SH SYNOPSIS +.B mk +[ +.B -f +.I mkfile +] ... +[ +.I option ... +] +[ +.I target ... +] +.SH DESCRIPTION +.I Mk +uses the dependency rules specified in +.I mkfile +to control the update (usually by compilation) of +.I targets +(usually files) +from the source files upon which they depend. +The +.I mkfile +(default +.LR mkfile ) +contains a +.I rule +for each target that identifies the files and other +targets upon which it depends and an +.IR sh (1) +script, a +.IR recipe , +to update the target. +The script is run if the target does not exist +or if it is older than any of the files it depends on. +.I Mkfile +may also contain +.I meta-rules +that define actions for updating implicit targets. +If no +.I target +is specified, the target of the first rule (not meta-rule) in +.I mkfile +is updated. +.PP +The environment variable +.B $NPROC +determines how many targets may be updated simultaneously; +Some operating systems, e.g., Plan 9, set +.B $NPROC +automatically to the number of CPUs on the current machine. +.PP +Options are: +.TP \w'\fL-d[egp]\ 'u +.B -a +Assume all targets to be out of date. +Thus, everything is updated. +.PD 0 +.TP +.BR -d [ egp ] +Produce debugging output +.RB ( p +is for parsing, +.B g +for graph building, +.B e +for execution). +.TP +.B -e +Explain why each target is made. +.TP +.B -i +Force any missing intermediate targets to be made. +.TP +.B -k +Do as much work as possible in the face of errors. +.TP +.B -n +Print, but do not execute, the commands +needed to update the targets. +.TP +.B -s +Make the command line arguments sequentially rather than in parallel. +.TP +.B -t +Touch (update the modified date of) file targets, without +executing any recipes. +.TP +.BI -w target1 , target2,... +Pretend the modify time for each +.I target +is the current time; useful in conjunction with +.B -n +to learn what updates would be triggered by +modifying the +.IR targets . +.PD +.SS The \fLmkfile\fP +A +.I mkfile +consists of +.I assignments +(described under `Environment') and +.IR rules . +A rule contains +.I targets +and a +.IR tail . +A target is a literal string +and is normally a file name. +The tail contains zero or more +.I prerequisites +and an optional +.IR recipe , +which is an +.B shell +script. +Each line of the recipe must begin with white space. +A rule takes the form +.IP +.EX +target: prereq1 prereq2 + \f2recipe using\fP prereq1, prereq2 \f2to build\fP target +.EE +.PP +When the recipe is executed, +the first character on every line is elided. +.PP +After the colon on the target line, a rule may specify +.IR attributes , +described below. +.PP +A +.I meta-rule +has a target of the form +.IB A % B +where +.I A +and +.I B +are (possibly empty) strings. +A meta-rule acts as a rule for any potential target whose +name matches +.IB A % B +with +.B % +replaced by an arbitrary string, called the +.IR stem . +In interpreting a meta-rule, +the stem is substituted for all occurrences of +.B % +in the prerequisite names. +In the recipe of a meta-rule, the environment variable +.B $stem +contains the string matched by the +.BR % . +For example, a meta-rule to compile a C program +might be: +.IP +.EX +%: %.c + cc -c $stem.c + ld -o $stem $stem.o +.EE +.PP +Meta-rules may contain an ampersand +.B & +rather than a percent sign +.BR % . +A +.B % +matches a maximal length string of any characters; +an +.B & +matches a maximal length string of any characters except period +or slash. +.PP +The text of the +.I mkfile +is processed as follows. +Lines beginning with +.B < +followed by a file name are replaced by the contents of the named +file. +Lines beginning with +.B "<|" +followed by a file name are replaced by the output +of the execution of the named +file. +Blank lines and comments, which run from unquoted +.B # +characters to the following newline, are deleted. +The character sequence backslash-newline is deleted, +so long lines in +.I mkfile +may be folded. +Non-recipe lines are processed by substituting for +.BI `{ command } +the output of the +.I command +when run by +.IR sh . +References to variables are replaced by the variables' values. +Special characters may be quoted using single quotes +.BR \&'' +as in +.IR sh (1). +.PP +Assignments and rules are distinguished by +the first unquoted occurrence of +.B : +(rule) +or +.B = +(assignment). +.PP +A later rule may modify or override an existing rule under the +following conditions: +.TP +\- +If the targets of the rules exactly match and one rule +contains only a prerequisite clause and no recipe, the +clause is added to the prerequisites of the other rule. +If either or both targets are virtual, the recipe is +always executed. +.TP +\- +If the targets of the rules match exactly and the +prerequisites do not match and both rules +contain recipes, +.I mk +reports an ``ambiguous recipe'' error. +.TP +\- +If the target and prerequisites of both rules match exactly, +the second rule overrides the first. +.SS Environment +Rules may make use of +shell +environment variables. +A legal reference of the form +.B $OBJ +or +.B ${name} +is expanded as in +.IR sh (1). +A reference of the form +.BI ${name: A % B = C\fL%\fID\fL}\fR, +where +.I A, B, C, D +are (possibly empty) strings, +has the value formed by expanding +.B $name +and substituting +.I C +for +.I A +and +.I D +for +.I B +in each word in +.B $name +that matches pattern +.IB A % B\f1. +.PP +Variables can be set by +assignments of the form +.I + var\fL=\fR[\fIattr\fL=\fR]\fIvalue\fR +.br +Blanks in the +.I value +break it into words. +Such variables are exported +to the environment of +recipes as they are executed, unless +.BR U , +the only legal attribute +.IR attr , +is present. +The initial value of a variable is +taken from (in increasing order of precedence) +the default values below, +.I mk's +environment, the +.IR mkfiles , +and any command line assignment as an argument to +.IR mk . +A variable assignment argument overrides the first (but not any subsequent) +assignment to that variable. +.PP +The variable +.B MKFLAGS +contains all the option arguments (arguments starting with +.L - +or containing +.LR = ) +and +.B MKARGS +contains all the targets in the call to +.IR mk . +.PP +The variable +.B MKSHELL +contains the shell command line +.I mk +uses to run recipes. +If the first word of the command ends in +.B rc +or +.BR rcsh , +.I mk +uses +.IR rc (1)'s +quoting rules; otherwise it uses +.IR sh (1)'s. +The +.B MKSHELL +variable is consulted when the mkfile is read, not when it is executed, +so that different shells can be used within a single mkfile: +.IP +.EX +MKSHELL=$PLAN9/bin/rc +use-rc:V: + for(i in a b c) echo $i + +MKSHELL=sh +use-sh:V: + for i in a b c; do echo $i; done +.EE +.LP +Mkfiles included via +.B < +or +.B <| +.RI ( q.v. ) +see their own private copy of +.BR MKSHELL , +which always starts set to +.B sh . +.PP +Dynamic information may be included in the mkfile by using a line of the form +.IP +\fR<|\fIcommand\fR \fIargs\fR +.LP +This runs the command +.I command +with the given arguments +.I args +and pipes its standard output to +.I mk +to be included as part of the mkfile. For instance, the Inferno kernels +use this technique +to run a shell command with an awk script and a configuration +file as arguments in order for +the +.I awk +script to process the file and output a set of variables and their values. +.SS Execution +.PP +During execution, +.I mk +determines which targets must be updated, and in what order, +to build the +.I names +specified on the command line. +It then runs the associated recipes. +.PP +A target is considered up to date if it has no prerequisites or +if all its prerequisites are up to date and it is newer +than all its prerequisites. +Once the recipe for a target has executed, the target is +considered up to date. +.PP +The date stamp +used to determine if a target is up to date is computed +differently for different types of targets. +If a target is +.I virtual +(the target of a rule with the +.B V +attribute), +its date stamp is initially zero; when the target is +updated the date stamp is set to +the most recent date stamp of its prerequisites. +Otherwise, if a target does not exist as a file, +its date stamp is set to the most recent date stamp of its prerequisites, +or zero if it has no prerequisites. +Otherwise, the target is the name of a file and +the target's date stamp is always that file's modification date. +The date stamp is computed when the target is needed in +the execution of a rule; it is not a static value. +.PP +Nonexistent targets that have prerequisites +and are themselves prerequisites are treated specially. +Such a target +.I t +is given the date stamp of its most recent prerequisite +and if this causes all the targets which have +.I t +as a prerequisite to be up to date, +.I t +is considered up to date. +Otherwise, +.I t +is made in the normal fashion. +The +.B -i +flag overrides this special treatment. +.PP +Files may be made in any order that respects +the preceding restrictions. +.PP +A recipe is executed by supplying the recipe as standard input to +the command +.BR /bin/sh . +(Note that unlike +.IR make , +.I mk +feeds the entire recipe to the shell rather than running each line +of the recipe separately.) +The environment is augmented by the following variables: +.TP 14 +.B $alltarget +all the targets of this rule. +.TP +.B $newprereq +the prerequisites that caused this rule to execute. +.TP +.B $newmember +the prerequisites that are members of an aggregate +that caused this rule to execute. +When the prerequisites of a rule are members of an +aggregate, +.B $newprereq +contains the name of the aggregate and out of date +members, while +.B $newmember +contains only the name of the members. +.TP +.B $nproc +the process slot for this recipe. +It satisfies +.RB 0≤ $nproc < $NPROC . +.TP +.B $pid +the process id for the +.I mk +executing the recipe. +.TP +.B $prereq +all the prerequisites for this rule. +.TP +.B $stem +if this is a meta-rule, +.B $stem +is the string that matched +.B % +or +.BR & . +Otherwise, it is empty. +For regular expression meta-rules (see below), the variables +.LR stem0 ", ...," +.L stem9 +are set to the corresponding subexpressions. +.TP +.B $target +the targets for this rule that need to be remade. +.PP +These variables are available only during the execution of a recipe, +not while evaluating the +.IR mkfile . +.PP +Unless the rule has the +.B Q +attribute, +the recipe is printed prior to execution +with recognizable environment variables expanded. +Commands returning error status +cause +.I mk +to terminate. +.PP +Recipes and backquoted +.B rc +commands in places such as assignments +execute in a copy of +.I mk's +environment; changes they make to +environment variables are not visible from +.IR mk . +.PP +Variable substitution in a rule is done when +the rule is read; variable substitution in the recipe is done +when the recipe is executed. For example: +.IP +.EX +bar=a.c +foo: $bar + $CC -o foo $bar +bar=b.c +.EE +.PP +will compile +.B b.c +into +.BR foo , +if +.B a.c +is newer than +.BR foo . +.SS Aggregates +Names of the form +.IR a ( b ) +refer to member +.I b +of the aggregate +.IR a . +.SS Attributes +The colon separating the target from the prerequisites +may be +immediately followed by +.I attributes +and another colon. +The attributes are: +.TP +.B D +If the recipe exits with a non-null status, the target is deleted. +.TP +.B E +Continue execution if the recipe draws errors. +.TP +.B N +If there is no recipe, the target has its time updated. +.TP +.B n +The rule is a meta-rule that cannot be a target of a virtual rule. +Only files match the pattern in the target. +.TP +.B P +The characters after the +.B P +until the terminating +.B : +are taken as a program name. +It will be invoked as +.B "sh -c prog 'arg1' 'arg2'" +and should return a zero exit status +if and only if arg1 is up to date with respect to arg2. +Date stamps are still propagated in the normal way. +.TP +.B Q +The recipe is not printed prior to execution. +.TP +.B R +The rule is a meta-rule using regular expressions. +In the rule, +.B % +has no special meaning. +The target is interpreted as a regular expression as defined in +.IR regexp9 (7). +The prerequisites may contain references +to subexpressions in form +.BI \e n\f1, +as in the substitute command of +.IR sed (1). +.TP +.B U +The targets are considered to have been updated +even if the recipe did not do so. +.TP +.B V +The targets of this rule are marked as virtual. +They are distinct from files of the same name. +.PD +.SH EXAMPLES +A simple mkfile to compile a program: +.IP +.EX +.ta 8n +8n +8n +8n +8n +8n +8n +</$objtype/mkfile + +prog: a.$O b.$O c.$O + $LD $LDFLAGS -o $target $prereq + +%.$O: %.c + $CC $CFLAGS $stem.c +.EE +.PP +Override flag settings in the mkfile: +.IP +.EX +% mk target 'CFLAGS=-S -w' +.EE +.PP +Maintain a library: +.IP +.EX +libc.a(%.$O):N: %.$O +libc.a: libc.a(abs.$O) libc.a(access.$O) libc.a(alarm.$O) ... + ar r libc.a $newmember +.EE +.PP +String expression variables to derive names from a master list: +.IP +.EX +NAMES=alloc arc bquote builtins expand main match mk var word +OBJ=${NAMES:%=%.$O} +.EE +.PP +Regular expression meta-rules: +.IP +.EX +([^/]*)/(.*)\e.$O:R: \e1/\e2.c + cd $stem1; $CC $CFLAGS $stem2.c +.EE +.PP +A correct way to deal with +.IR yacc (1) +grammars. +The file +.B lex.c +includes the file +.B x.tab.h +rather than +.B y.tab.h +in order to reflect changes in content, not just modification time. +.IP +.EX +lex.$O: x.tab.h +x.tab.h: y.tab.h + cmp -s x.tab.h y.tab.h || cp y.tab.h x.tab.h +y.tab.c y.tab.h: gram.y + $YACC -d gram.y +.EE +.PP +The above example could also use the +.B P +attribute for the +.B x.tab.h +rule: +.IP +.EX +x.tab.h:Pcmp -s: y.tab.h + cp y.tab.h x.tab.h +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR sh (1), +.IR regexp9 (7) +.PP +A. Hume, +``Mk: a Successor to Make'' +(Tenth Edition Research Unix Manuals). +.PP +Andrew G. Hume and Bob Flandrena, +``Maintaining Files on Plan 9 with Mk''. +.BR DOCPREFIX/doc/mk.pdf . +.SH HISTORY +Andrew Hume wrote +.I mk +for Tenth Edition Research Unix. +It was later ported to Plan 9. +This software is a port of the Plan 9 version back to Unix. +.SH BUGS +Identical recipes for regular expression meta-rules only have one target. +.PP +Seemingly appropriate input like +.B CFLAGS=-DHZ=60 +is parsed as an erroneous attribute; correct it by inserting +a space after the first +.LR = . +.PP +The recipes printed by +.I mk +before being passed to +the shell +for execution are sometimes erroneously expanded +for printing. Don't trust what's printed; rely +on what the shell +does. diff --git a/mk/mk/mk.c b/mk/mk/mk.c @@ -0,0 +1,234 @@ +#include "mk.h" + +int runerrs; + +void +mk(char *target) +{ + Node *node; + int did = 0; + + nproc(); /* it can be updated dynamically */ + nrep(); /* it can be updated dynamically */ + runerrs = 0; + node = graph(target); + if(DEBUG(D_GRAPH)){ + dumpn("new target\n", node); + Bflush(&bout); + } + clrmade(node); + while(node->flags&NOTMADE){ + if(work(node, (Node *)0, (Arc *)0)) + did = 1; /* found something to do */ + else { + if(waitup(1, (int *)0) > 0){ + if(node->flags&(NOTMADE|BEINGMADE)){ + assert("must be run errors", runerrs); + break; /* nothing more waiting */ + } + } + } + } + if(node->flags&BEINGMADE) + waitup(-1, (int *)0); + while(jobs) + waitup(-2, (int *)0); + assert("target didn't get done", runerrs || (node->flags&MADE)); + if(did == 0) + Bprint(&bout, "mk: '%s' is up to date\n", node->name); +} + +void +clrmade(Node *n) +{ + Arc *a; + + n->flags &= ~(CANPRETEND|PRETENDING); + if(strchr(n->name, '(') ==0 || n->time) + n->flags |= CANPRETEND; + MADESET(n, NOTMADE); + for(a = n->prereqs; a; a = a->next) + if(a->n) + clrmade(a->n); +} + +static void +unpretend(Node *n) +{ + MADESET(n, NOTMADE); + n->flags &= ~(CANPRETEND|PRETENDING); + n->time = 0; +} + +static char* +dir(void) +{ + static char buf[1024]; + + return getcwd(buf, sizeof buf); +} + +int +work(Node *node, Node *p, Arc *parc) +{ + Arc *a, *ra; + int weoutofdate; + int ready; + int did = 0; + + /*print("work(%s) flags=0x%x time=%ld\n", node->name, node->flags, node->time); */ + if(node->flags&BEINGMADE) + return(did); + if((node->flags&MADE) && (node->flags&PRETENDING) && p && outofdate(p, parc, 0)){ + if(explain) + fprint(1, "unpretending %s(%ld) because %s is out of date(%ld)\n", + node->name, node->time, p->name, p->time); + unpretend(node); + } + /* + have a look if we are pretending in case + someone has been unpretended out from underneath us + */ + if(node->flags&MADE){ + if(node->flags&PRETENDING){ + node->time = 0; + }else + return(did); + } + /* consider no prerequsite case */ + if(node->prereqs == 0){ + if(node->time == 0){ + fprint(2, "mk: don't know how to make '%s' in %s\n", node->name, dir()); + if(kflag){ + node->flags |= BEINGMADE; + runerrs++; + } else + Exit(); + } else + MADESET(node, MADE); + return(did); + } + /* + now see if we are out of date or what + */ + ready = 1; + weoutofdate = aflag; + ra = 0; + for(a = node->prereqs; a; a = a->next) + if(a->n){ + did = work(a->n, node, a) || did; + if(a->n->flags&(NOTMADE|BEINGMADE)) + ready = 0; + if(outofdate(node, a, 0)){ + weoutofdate = 1; + if((ra == 0) || (ra->n == 0) + || (ra->n->time < a->n->time)) + ra = a; + } + } else { + if(node->time == 0){ + if(ra == 0) + ra = a; + weoutofdate = 1; + } + } + if(ready == 0) /* can't do anything now */ + return(did); + if(weoutofdate == 0){ + MADESET(node, MADE); + return(did); + } + /* + can we pretend to be made? + */ + if((iflag == 0) && (node->time == 0) && (node->flags&(PRETENDING|CANPRETEND)) + && p && ra->n && !outofdate(p, ra, 0)){ + node->flags &= ~CANPRETEND; + MADESET(node, MADE); + if(explain && ((node->flags&PRETENDING) == 0)) + fprint(1, "pretending %s has time %ld\n", node->name, node->time); + node->flags |= PRETENDING; + return(did); + } + /* + node is out of date and we REALLY do have to do something. + quickly rescan for pretenders + */ + for(a = node->prereqs; a; a = a->next) + if(a->n && (a->n->flags&PRETENDING)){ + if(explain) + Bprint(&bout, "unpretending %s because of %s because of %s\n", + a->n->name, node->name, ra->n? ra->n->name : "rule with no prerequisites"); + + unpretend(a->n); + did = work(a->n, node, a) || did; + ready = 0; + } + if(ready == 0) /* try later unless nothing has happened for -k's sake */ + return(did || work(node, p, parc)); + did = dorecipe(node) || did; + return(did); +} + +void +update(int fake, Node *node) +{ + Arc *a; + + MADESET(node, fake? BEINGMADE : MADE); + if(((node->flags&VIRTUAL) == 0) && (access(node->name, 0) == 0)){ + node->time = timeof(node->name, 1); + node->flags &= ~(CANPRETEND|PRETENDING); + for(a = node->prereqs; a; a = a->next) + if(a->prog) + outofdate(node, a, 1); + } else { + node->time = 1; + for(a = node->prereqs; a; a = a->next) + if(a->n && outofdate(node, a, 1)) + node->time = a->n->time; + } +/* print("----node %s time=%ld flags=0x%x\n", node->name, node->time, node->flags);*/ +} + +static int +pcmp(char *prog, char *p, char *q, Shell *sh, Word *shcmd) +{ + char buf[3*NAMEBLOCK]; + int pid; + + Bflush(&bout); + snprint(buf, sizeof buf, "%s '%s' '%s'\n", prog, p, q); + pid = pipecmd(buf, 0, 0, sh, shcmd); + while(waitup(-3, &pid) >= 0) + ; + return(pid? 2:1); +} + +int +outofdate(Node *node, Arc *arc, int eval) +{ + char buf[3*NAMEBLOCK], *str; + Symtab *sym; + int ret; + + str = 0; + if(arc->prog){ + snprint(buf, sizeof buf, "%s%c%s", node->name, 0377, arc->n->name); + sym = symlook(buf, S_OUTOFDATE, 0); + if(sym == 0 || eval){ + if(sym == 0) + str = strdup(buf); + ret = pcmp(arc->prog, node->name, arc->n->name, arc->r->shellt, arc->r->shellcmd); + if(sym) + sym->u.value = ret; + else + symlook(str, S_OUTOFDATE, (void *)(uintptr)ret); + } else + ret = sym->u.value; + return(ret-1); + } else if(strchr(arc->n->name, '(') && arc->n->time == 0) /* missing archive member */ + return 1; + else + return node->time <= arc->n->time; +} diff --git a/mk/mk/mk.h b/mk/mk/mk.h @@ -0,0 +1,185 @@ +#include "sys.h" + +#undef assert +#define assert mkassert +extern Biobuf bout; + +typedef struct Bufblock +{ + struct Bufblock *next; + char *start; + char *end; + char *current; +} Bufblock; + +typedef struct Word +{ + char *s; + struct Word *next; +} Word; + +typedef struct Envy +{ + char *name; + Word *values; +} Envy; + +extern Envy *envy; + +typedef struct Shell +{ + char *name; + char *termchars; /* used in parse.c to isolate assignment attribute */ + int iws; /* inter-word separator in environment */ + char *(*charin)(char*, char*); /* search for unescaped characters */ + char *(*expandquote)(char*, Rune, Bufblock*); /* extract escaped token */ + int (*escapetoken)(Biobuf*, Bufblock*, int, int); /* input escaped token */ + char *(*copyq)(char*, Rune, Bufblock*); /* check for quoted strings */ + int (*matchname)(char*); /* does name match */ +} Shell; + +typedef struct Rule +{ + char *target; /* one target */ + Word *tail; /* constituents of targets */ + char *recipe; /* do it ! */ + short attr; /* attributes */ + short line; /* source line */ + char *file; /* source file */ + Word *alltargets; /* all the targets */ + int rule; /* rule number */ + Reprog *pat; /* reg exp goo */ + char *prog; /* to use in out of date */ + struct Rule *chain; /* hashed per target */ + struct Rule *next; + Shell *shellt; /* shell to use with this rule */ + Word *shellcmd; +} Rule; + +extern Rule *rules, *metarules, *patrule; + +/* Rule.attr */ +#define META 0x0001 +#define UNUSED 0x0002 +#define UPD 0x0004 +#define QUIET 0x0008 +#define VIR 0x0010 +#define REGEXP 0x0020 +#define NOREC 0x0040 +#define DEL 0x0080 +#define NOVIRT 0x0100 + +#define NREGEXP 10 + +typedef struct Arc +{ + short flag; + struct Node *n; + Rule *r; + char *stem; + char *prog; + char *match[NREGEXP]; + struct Arc *next; +} Arc; + + /* Arc.flag */ +#define TOGO 1 + +typedef struct Node +{ + char *name; + long time; + unsigned short flags; + Arc *prereqs; + struct Node *next; /* list for a rule */ +} Node; + + /* Node.flags */ +#define VIRTUAL 0x0001 +#define CYCLE 0x0002 +#define READY 0x0004 +#define CANPRETEND 0x0008 +#define PRETENDING 0x0010 +#define NOTMADE 0x0020 +#define BEINGMADE 0x0040 +#define MADE 0x0080 +#define MADESET(n,m) n->flags = (n->flags&~(NOTMADE|BEINGMADE|MADE))|(m) +#define PROBABLE 0x0100 +#define VACUOUS 0x0200 +#define NORECIPE 0x0400 +#define DELETE 0x0800 +#define NOMINUSE 0x1000 + +typedef struct Job +{ + Rule *r; /* master rule for job */ + Node *n; /* list of node targets */ + char *stem; + char **match; + Word *p; /* prerequistes */ + Word *np; /* new prerequistes */ + Word *t; /* targets */ + Word *at; /* all targets */ + int nproc; /* slot number */ + struct Job *next; +} Job; +extern Job *jobs; + +typedef struct Symtab +{ + short space; + char *name; + union { + void *ptr; + uintptr value; + } u; + struct Symtab *next; +} Symtab; + +enum { + S_VAR, /* variable -> value */ + S_TARGET, /* target -> rule */ + S_TIME, /* file -> time */ + S_PID, /* pid -> products */ + S_NODE, /* target name -> node */ + S_AGG, /* aggregate -> time */ + S_BITCH, /* bitched about aggregate not there */ + S_NOEXPORT, /* var -> noexport */ + S_OVERRIDE, /* can't override */ + S_OUTOFDATE, /* n1\377n2 -> 2(outofdate) or 1(not outofdate) */ + S_MAKEFILE, /* target -> node */ + S_MAKEVAR, /* dumpable mk variable */ + S_EXPORTED, /* var -> current exported value */ + S_WESET, /* variable; we set in the mkfile */ + S_INTERNAL /* an internal mk variable (e.g., stem, target) */ +}; + +extern int debug; +extern int nflag, tflag, iflag, kflag, aflag, mflag; +extern int mkinline; +extern char *infile; +extern int nreps; +extern char *explain; +extern Shell *shellt; +extern Word *shellcmd; + +extern Shell shshell, rcshell; + +#define SYNERR(l) (fprint(2, "mk: %s:%d: syntax error; ", infile, ((l)>=0)?(l):mkinline)) +#define RERR(r) (fprint(2, "mk: %s:%d: rule error; ", (r)->file, (r)->line)) +#define NAMEBLOCK 1000 +#define BIGBLOCK 20000 + +#define SEP(c) (((c)==' ')||((c)=='\t')||((c)=='\n')) +#define WORDCHR(r) ((r) > ' ' && !utfrune("!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~", (r))) + +#define DEBUG(x) (debug&(x)) +#define D_PARSE 0x01 +#define D_GRAPH 0x02 +#define D_EXEC 0x04 + +#define LSEEK(f,o,p) seek(f,o,p) + +#define PERCENT(ch) (((ch) == '%') || ((ch) == '&')) + +#include "fns.h" diff --git a/mk/mk/mkfile b/mk/mk/mkfile @@ -0,0 +1,35 @@ +BIN = mk +OBJ = \ + arc.o\ + archive.o\ + bufblock.o\ + env.o\ + file.o\ + graph.o\ + job.o\ + lex.o\ + main.o\ + match.o\ + mk.o\ + parse.o\ + recipe.o\ + rc.o\ + rule.o\ + run.o\ + sh.o\ + shell.o\ + shprint.o\ + symtab.o\ + var.o\ + varsub.o\ + word.o\ + unix.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include +LOCAL_LDFLAGS = -L"$PREFIX"/lib +INSTALL_BIN = mk +INSTALL_MAN1 = mk.1 +DEPS = libbio libfmt libutf libregexp + +<$mkbuild/mk.default + diff --git a/mk/mk/parse.c b/mk/mk/parse.c @@ -0,0 +1,318 @@ +#include "mk.h" + +char *infile; +int mkinline; +static int rhead(char *, Word **, Word **, int *, char **); +static char *rbody(Biobuf*); +extern Word *target1; + +void +parse(char *f, int fd, int varoverride) +{ + int hline; + char *body; + Word *head, *tail; + int attr, set, pid; + char *prog, *p; + int newfd; + Biobuf in; + Bufblock *buf; + char *err; + + if(fd < 0){ + fprint(2, "open %s: %r\n", f); + Exit(); + } + pushshell(); + ipush(); + infile = strdup(f); + mkinline = 1; + Binit(&in, fd, OREAD); + buf = newbuf(); + while(assline(&in, buf)){ + hline = mkinline; + switch(rhead(buf->start, &head, &tail, &attr, &prog)) + { + case '<': + p = wtos(tail, ' '); + if(*p == 0){ + SYNERR(-1); + fprint(2, "missing include file name\n"); + Exit(); + } + newfd = open(p, OREAD); + if(newfd < 0){ + fprint(2, "warning: skipping missing include file %s: %r\n", p); + } else + parse(p, newfd, 0); + break; + case '|': + p = wtos(tail, ' '); + if(*p == 0){ + SYNERR(-1); + fprint(2, "missing include program name\n"); + Exit(); + } + execinit(); + pid=pipecmd(p, envy, &newfd, shellt, shellcmd); + if(newfd < 0){ + fprint(2, "warning: skipping missing program file %s: %r\n", p); + } else + parse(p, newfd, 0); + while(waitup(-3, &pid) >= 0) + ; + if(pid != 0){ + fprint(2, "bad include program status\n"); + Exit(); + } + break; + case ':': + body = rbody(&in); + addrules(head, tail, body, attr, hline, prog); + break; + case '=': + if(head->next){ + SYNERR(-1); + fprint(2, "multiple vars on left side of assignment\n"); + Exit(); + } + if(symlook(head->s, S_OVERRIDE, 0)){ + set = varoverride; + } else { + set = 1; + if(varoverride) + symlook(head->s, S_OVERRIDE, (void *)""); + } + if(set){ +/* +char *cp; +dumpw("tail", tail); +cp = wtos(tail, ' '); print("assign %s to %s\n", head->s, cp); free(cp); +*/ + setvar(head->s, (void *) tail); + symlook(head->s, S_WESET, (void *)""); + if(strcmp(head->s, "MKSHELL") == 0){ + if((err = setshell(tail)) != nil){ + SYNERR(hline); + fprint(2, "%s\n", err); + Exit(); + break; + } + } + } + if(attr) + symlook(head->s, S_NOEXPORT, (void *)""); + break; + default: + SYNERR(hline); + fprint(2, "expected one of :<=\n"); + Exit(); + break; + } + } + close(fd); + freebuf(buf); + ipop(); + popshell(); +} + +void +addrules(Word *head, Word *tail, char *body, int attr, int hline, char *prog) +{ + Word *w; + + assert("addrules args", head && body); + /* tuck away first non-meta rule as default target*/ + if(target1 == 0 && !(attr&REGEXP)){ + for(w = head; w; w = w->next) + if(shellt->charin(w->s, "%&")) + break; + if(w == 0) + target1 = wdup(head); + } + for(w = head; w; w = w->next) + addrule(w->s, tail, body, head, attr, hline, prog); +} + +static int +rhead(char *line, Word **h, Word **t, int *attr, char **prog) +{ + char *p; + char *pp; + int sep; + Rune r; + int n; + Word *w; + + p = shellt->charin(line,":=<"); + if(p == 0) + return('?'); + sep = *p; + *p++ = 0; + if(sep == '<' && *p == '|'){ + sep = '|'; + p++; + } + *attr = 0; + *prog = 0; + if(sep == '='){ + pp = shellt->charin(p, shellt->termchars); /* termchars is shell-dependent */ + if (pp && *pp == '=') { + while (p != pp) { + n = chartorune(&r, p); + switch(r) + { + default: + SYNERR(-1); + fprint(2, "unknown attribute '%c'\n",*p); + Exit(); + case 'U': + *attr = 1; + break; + } + p += n; + } + p++; /* skip trailing '=' */ + } + } + if((sep == ':') && *p && (*p != ' ') && (*p != '\t')){ + while (*p) { + n = chartorune(&r, p); + if (r == ':') + break; + p += n; + switch(r) + { + default: + SYNERR(-1); + fprint(2, "unknown attribute '%c'\n", p[-1]); + Exit(); + case 'D': + *attr |= DEL; + break; + case 'E': + *attr |= NOMINUSE; + break; + case 'n': + *attr |= NOVIRT; + break; + case 'N': + *attr |= NOREC; + break; + case 'P': + pp = utfrune(p, ':'); + if (pp == 0 || *pp == 0) + goto eos; + *pp = 0; + *prog = strdup(p); + *pp = ':'; + p = pp; + break; + case 'Q': + *attr |= QUIET; + break; + case 'R': + *attr |= REGEXP; + break; + case 'U': + *attr |= UPD; + break; + case 'V': + *attr |= VIR; + break; + } + } + if (*p++ != ':') { + eos: + SYNERR(-1); + fprint(2, "missing trailing :\n"); + Exit(); + } + } + *h = w = stow(line); + if(*w->s == 0 && sep != '<' && sep != '|' && sep != 'S') { + SYNERR(mkinline-1); + fprint(2, "no var on left side of assignment/rule\n"); + Exit(); + } + *t = stow(p); + return(sep); +} + +static char * +rbody(Biobuf *in) +{ + Bufblock *buf; + int r, lastr; + char *p; + + lastr = '\n'; + buf = newbuf(); + for(;;){ + r = Bgetrune(in); + if (r < 0) + break; + if (lastr == '\n') { + if (r == '#') + rinsert(buf, r); + else if (r != ' ' && r != '\t') { + Bungetrune(in); + break; + } + } else + rinsert(buf, r); + lastr = r; + if (r == '\n') + mkinline++; + } + insert(buf, 0); + p = strdup(buf->start); + freebuf(buf); + return p; +} + +struct input +{ + char *file; + int line; + struct input *next; +}; +static struct input *inputs = 0; + +void +ipush(void) +{ + struct input *in, *me; + + me = (struct input *)Malloc(sizeof(*me)); + me->file = infile; + me->line = mkinline; + me->next = 0; + if(inputs == 0) + inputs = me; + else { + for(in = inputs; in->next; ) + in = in->next; + in->next = me; + } +} + +void +ipop(void) +{ + struct input *in, *me; + + assert("pop input list", inputs != 0); + if(inputs->next == 0){ + me = inputs; + inputs = 0; + } else { + for(in = inputs; in->next->next; ) + in = in->next; + me = in->next; + in->next = 0; + } + infile = me->file; + mkinline = me->line; + free((char *)me); +} diff --git a/mk/mk/rc.c b/mk/mk/rc.c @@ -0,0 +1,194 @@ +#include "mk.h" + +/* + * This file contains functions that depend on rc's syntax. Most + * of the routines extract strings observing rc's escape conventions + */ + + +/* + * skip a token in single quotes. + */ +static char * +squote(char *cp) +{ + Rune r; + int n; + + while(*cp){ + n = chartorune(&r, cp); + if(r == '\'') { + n += chartorune(&r, cp+n); + if(r != '\'') + return(cp); + } + cp += n; + } + SYNERR(-1); /* should never occur */ + fprint(2, "missing closing '\n"); + return 0; +} + +/* + * search a string for characters in a pattern set + * characters in quotes and variable generators are escaped + */ +char * +rccharin(char *cp, char *pat) +{ + Rune r; + int n, vargen; + + vargen = 0; + while(*cp){ + n = chartorune(&r, cp); + switch(r){ + case '\'': /* skip quoted string */ + cp = squote(cp+1); /* n must = 1 */ + if(!cp) + return 0; + break; + case '$': + if(*(cp+1) == '{') + vargen = 1; + break; + case '}': + if(vargen) + vargen = 0; + else if(utfrune(pat, r)) + return cp; + break; + default: + if(vargen == 0 && utfrune(pat, r)) + return cp; + break; + } + cp += n; + } + if(vargen){ + SYNERR(-1); + fprint(2, "missing closing } in pattern generator\n"); + } + return 0; +} + +/* + * extract an escaped token. Possible escape chars are single-quote, + * double-quote,and backslash. Only the first is valid for rc. the + * others are just inserted into the receiving buffer. + */ +char* +rcexpandquote(char *s, Rune r, Bufblock *b) +{ + if (r != '\'') { + rinsert(b, r); + return s; + } + + while(*s){ + s += chartorune(&r, s); + if(r == '\'') { + if(*s == '\'') + s++; + else + return s; + } + rinsert(b, r); + } + return 0; +} + +/* + * Input an escaped token. Possible escape chars are single-quote, + * double-quote and backslash. Only the first is a valid escape for + * rc; the others are just inserted into the receiving buffer. + */ +int +rcescapetoken(Biobuf *bp, Bufblock *buf, int preserve, int esc) +{ + int c, line; + + if(esc != '\'') + return 1; + + line = mkinline; + while((c = nextrune(bp, 0)) > 0){ + if(c == '\''){ + if(preserve) + rinsert(buf, c); + c = Bgetrune(bp); + if (c < 0) + break; + if(c != '\''){ + Bungetrune(bp); + return 1; + } + } + rinsert(buf, c); + } + SYNERR(line); fprint(2, "missing closing %c\n", esc); + return 0; +} + +/* + * copy a single-quoted string; s points to char after opening quote + */ +static char * +copysingle(char *s, Bufblock *buf) +{ + Rune r; + + while(*s){ + s += chartorune(&r, s); + rinsert(buf, r); + if(r == '\'') + break; + } + return s; +} +/* + * check for quoted strings. backquotes are handled here; single quotes above. + * s points to char after opening quote, q. + */ +char * +rccopyq(char *s, Rune q, Bufblock *buf) +{ + if(q == '\'') /* copy quoted string */ + return copysingle(s, buf); + + if(q != '`') /* not quoted */ + return s; + + while(*s){ /* copy backquoted string */ + s += chartorune(&q, s); + rinsert(buf, q); + if(q == '}') + break; + if(q == '\'') + s = copysingle(s, buf); /* copy quoted string */ + } + return s; +} + +static int +rcmatchname(char *name) +{ + char *p; + + if((p = strrchr(name, '/')) != nil) + name = p+1; + if(name[0] == 'r' && name[1] == 'c') + return 1; + return 0; +} + +Shell rcshell = { + "rc", + "'= \t", + '\1', + rccharin, + rcexpandquote, + rcescapetoken, + rccopyq, + rcmatchname +}; diff --git a/mk/mk/recipe.c b/mk/mk/recipe.c @@ -0,0 +1,117 @@ +#include "mk.h" + +int +dorecipe(Node *node) +{ + char buf[BIGBLOCK]; + register Node *n; + Rule *r = 0; + Arc *a, *aa; + Word head, ahead, lp, ln, *w, *ww, *aw; + Symtab *s; + int did = 0; + + aa = 0; + /* + pick up the rule + */ + for(a = node->prereqs; a; a = a->next) + if(*a->r->recipe) + r = (aa = a)->r; + /* + no recipe? go to buggery! + */ + if(r == 0){ + if(!(node->flags&VIRTUAL) && !(node->flags&NORECIPE)){ + fprint(2, "mk: no recipe to make '%s'\n", node->name); + Exit(); + } + if(strchr(node->name, '(') && node->time == 0) + MADESET(node, MADE); + else + update(0, node); + if(tflag){ + if(!(node->flags&VIRTUAL)) + touch(node->name); + else if(explain) + Bprint(&bout, "no touch of virtual '%s'\n", node->name); + } + return(did); + } + /* + build the node list + */ + node->next = 0; + head.next = 0; + ww = &head; + ahead.next = 0; + aw = &ahead; + if(r->attr&REGEXP){ + ww->next = newword(node->name); + aw->next = newword(node->name); + } else { + for(w = r->alltargets; w; w = w->next){ + if(r->attr&META) + subst(aa->stem, w->s, buf); + else + strcpy(buf, w->s); + aw->next = newword(buf); + aw = aw->next; + if((s = symlook(buf, S_NODE, 0)) == 0) + continue; /* not a node we are interested in */ + n = s->u.ptr; + if(aflag == 0 && n->time) { + for(a = n->prereqs; a; a = a->next) + if(a->n && outofdate(n, a, 0)) + break; + if(a == 0) + continue; + } + ww->next = newword(buf); + ww = ww->next; + if(n == node) continue; + n->next = node->next; + node->next = n; + } + } + for(n = node; n; n = n->next) + if((n->flags&READY) == 0) + return(did); + /* + gather the params for the job + */ + lp.next = ln.next = 0; + for(n = node; n; n = n->next){ + for(a = n->prereqs; a; a = a->next){ + if(a->n){ + addw(&lp, a->n->name); + if(outofdate(n, a, 0)){ + addw(&ln, a->n->name); + if(explain) + fprint(1, "%s(%ld) < %s(%ld)\n", + n->name, n->time, a->n->name, a->n->time); + } + } else { + if(explain) + fprint(1, "%s has no prerequisites\n", + n->name); + } + } + MADESET(n, BEINGMADE); + } + /*print("lt=%s ln=%s lp=%s\n",wtos(head.next, ' '),wtos(ln.next, ' '),wtos(lp.next, ' '));*/ + run(newjob(r, node, aa->stem, aa->match, lp.next, ln.next, head.next, ahead.next)); + return(1); +} + +void +addw(Word *w, char *s) +{ + Word *lw; + + for(lw = w; w = w->next; lw = w){ + if(strcmp(s, w->s) == 0) + return; + } + lw->next = newword(s); +} diff --git a/mk/mk/rule.c b/mk/mk/rule.c @@ -0,0 +1,112 @@ +#include "mk.h" + +static Rule *lr, *lmr; +static int rcmp(Rule *r, char *target, Word *tail); +static int nrules = 0; + +void +addrule(char *head, Word *tail, char *body, Word *ahead, int attr, int hline, char *prog) +{ + Rule *r; + Rule *rr; + Symtab *sym; + int reuse; + + r = 0; + reuse = 0; + if(sym = symlook(head, S_TARGET, 0)){ + for(r = sym->u.ptr; r; r = r->chain) + if(rcmp(r, head, tail) == 0){ + reuse = 1; + break; + } + } + if(r == 0) + r = (Rule *)Malloc(sizeof(Rule)); + r->shellt = shellt; + r->shellcmd = shellcmd; + r->target = head; + r->tail = tail; + r->recipe = body; + r->line = hline; + r->file = infile; + r->attr = attr; + r->alltargets = ahead; + r->prog = prog; + r->rule = nrules++; + if(!reuse){ + rr = symlook(head, S_TARGET, (void *)r)->u.ptr; + if(rr != r){ + r->chain = rr->chain; + rr->chain = r; + } else + r->chain = 0; + } + if(!reuse) + r->next = 0; + if((attr&REGEXP) || shellt->charin(head, "%&")){ + r->attr |= META; + if(reuse) + return; + if(attr&REGEXP){ + patrule = r; + r->pat = regcomp(head); + } + if(metarules == 0) + metarules = lmr = r; + else { + lmr->next = r; + lmr = r; + } + } else { + if(reuse) + return; + r->pat = 0; + if(rules == 0) + rules = lr = r; + else { + lr->next = r; + lr = r; + } + } +} + +void +dumpr(char *s, Rule *r) +{ + if(r == nil) + return; + Bprint(&bout, "%s: start=%ld shelltype=%s shellcmd=%s\n", + s, r, r->shellt->name, wtos(r->shellcmd, ' ')); + for(; r; r = r->next){ + Bprint(&bout, "\tRule %ld: %s[%d] attr=%x next=%ld chain=%ld alltarget='%s'", + r, r->file, r->line, r->attr, r->next, r->chain, wtos(r->alltargets, ' ')); + if(r->prog) + Bprint(&bout, " prog='%s'", r->prog); + Bprint(&bout, "\n\ttarget=%s: %s\n", r->target, wtos(r->tail, ' ')); + Bprint(&bout, "\trecipe@%ld='%s'\n", r->recipe, r->recipe); + } +} + +static int +rcmp(Rule *r, char *target, Word *tail) +{ + Word *w; + + if(strcmp(r->target, target)) + return 1; + for(w = r->tail; w && tail; w = w->next, tail = tail->next) + if(strcmp(w->s, tail->s)) + return 1; + return(w || tail); +} + +char * +rulecnt(void) +{ + char *s; + + s = Malloc(nrules); + memset(s, 0, nrules); + return(s); +} diff --git a/mk/mk/run.c b/mk/mk/run.c @@ -0,0 +1,296 @@ +#include "mk.h" + +typedef struct Event +{ + int pid; + Job *job; +} Event; +static Event *events; +static int nevents, nrunning, nproclimit; + +typedef struct Process +{ + int pid; + int status; + struct Process *b, *f; +} Process; +static Process *phead, *pfree; +static void sched(void); +static void pnew(int, int), pdelete(Process *); + +int pidslot(int); + +void +run(Job *j) +{ + Job *jj; + + if(jobs){ + for(jj = jobs; jj->next; jj = jj->next) + ; + jj->next = j; + } else + jobs = j; + j->next = 0; + /* this code also in waitup after parse redirect */ + if(nrunning < nproclimit) + sched(); +} + +static void +sched(void) +{ + char *flags; + Job *j; + Bufblock *buf; + int slot; + Node *n; + Envy *e; + + if(jobs == 0){ + usage(); + return; + } + j = jobs; + jobs = j->next; + if(DEBUG(D_EXEC)) + fprint(1, "firing up job for target %s\n", wtos(j->t, ' ')); + slot = nextslot(); + events[slot].job = j; + buf = newbuf(); + e = buildenv(j, slot); + shprint(j->r->recipe, e, buf, j->r->shellt); + if(!tflag && (nflag || !(j->r->attr&QUIET))) + Bwrite(&bout, buf->start, (long)strlen(buf->start)); + freebuf(buf); + if(nflag||tflag){ + for(n = j->n; n; n = n->next){ + if(tflag){ + if(!(n->flags&VIRTUAL)) + touch(n->name); + else if(explain) + Bprint(&bout, "no touch of virtual '%s'\n", n->name); + } + n->time = time((long *)0); + MADESET(n, MADE); + } + } else { + if(DEBUG(D_EXEC)) + fprint(1, "recipe='%s'", j->r->recipe);/**/ + Bflush(&bout); + if(j->r->attr&NOMINUSE) + flags = 0; + else + flags = "-e"; + events[slot].pid = execsh(flags, j->r->recipe, 0, e, j->r->shellt, j->r->shellcmd); + usage(); + nrunning++; + if(DEBUG(D_EXEC)) + fprint(1, "pid for target %s = %d\n", wtos(j->t, ' '), events[slot].pid); + } +} + +int +waitup(int echildok, int *retstatus) +{ + Envy *e; + int pid; + int slot; + Symtab *s; + Word *w; + Job *j; + char buf[ERRMAX]; + Bufblock *bp; + int uarg = 0; + int done; + Node *n; + Process *p; + extern int runerrs; + + /* first check against the proces slist */ + if(retstatus) + for(p = phead; p; p = p->f) + if(p->pid == *retstatus){ + *retstatus = p->status; + pdelete(p); + return(-1); + } +again: /* rogue processes */ + pid = waitfor(buf); + if(pid == -1){ + if(echildok > 0) + return(1); + else { + fprint(2, "mk: (waitup %d): %r\n", echildok); + Exit(); + } + } + if(DEBUG(D_EXEC)) + fprint(1, "waitup got pid=%d, status='%s'\n", pid, buf); + if(retstatus && pid == *retstatus){ + *retstatus = buf[0]? 1:0; + return(-1); + } + slot = pidslot(pid); + if(slot < 0){ + if(DEBUG(D_EXEC)) + fprint(2, "mk: wait returned unexpected process %d\n", pid); + pnew(pid, buf[0]? 1:0); + goto again; + } + j = events[slot].job; + usage(); + nrunning--; + events[slot].pid = -1; + if(buf[0]){ + e = buildenv(j, slot); + bp = newbuf(); + shprint(j->r->recipe, e, bp, j->r->shellt); + front(bp->start); + fprint(2, "mk: %s: exit status=%s", bp->start, buf); + freebuf(bp); + for(n = j->n, done = 0; n; n = n->next) + if(n->flags&DELETE){ + if(done++ == 0) + fprint(2, ", deleting"); + fprint(2, " '%s'", n->name); + delete(n->name); + } + fprint(2, "\n"); + if(kflag){ + runerrs++; + uarg = 1; + } else { + jobs = 0; + Exit(); + } + } + for(w = j->t; w; w = w->next){ + if((s = symlook(w->s, S_NODE, 0)) == 0) + continue; /* not interested in this node */ + update(uarg, s->u.ptr); + } + if(nrunning < nproclimit) + sched(); + return(0); +} + +void +nproc(void) +{ + Symtab *sym; + Word *w; + + if(sym = symlook("NPROC", S_VAR, 0)) { + w = sym->u.ptr; + if (w && w->s && w->s[0]) + nproclimit = atoi(w->s); + } + if(nproclimit < 1) + nproclimit = 1; + if(DEBUG(D_EXEC)) + fprint(1, "nprocs = %d\n", nproclimit); + if(nproclimit > nevents){ + if(nevents) + events = (Event *)Realloc((char *)events, nproclimit*sizeof(Event)); + else + events = (Event *)Malloc(nproclimit*sizeof(Event)); + while(nevents < nproclimit) + events[nevents++].pid = 0; + } +} + +int +nextslot(void) +{ + int i; + + for(i = 0; i < nproclimit; i++) + if(events[i].pid <= 0) return i; + assert("out of slots!!", 0); + return 0; /* cyntax */ +} + +int +pidslot(int pid) +{ + int i; + + for(i = 0; i < nevents; i++) + if(events[i].pid == pid) return(i); + if(DEBUG(D_EXEC)) + fprint(2, "mk: wait returned unexpected process %d\n", pid); + return(-1); +} + + +static void +pnew(int pid, int status) +{ + Process *p; + + if(pfree){ + p = pfree; + pfree = p->f; + } else + p = (Process *)Malloc(sizeof(Process)); + p->pid = pid; + p->status = status; + p->f = phead; + phead = p; + if(p->f) + p->f->b = p; + p->b = 0; +} + +static void +pdelete(Process *p) +{ + if(p->f) + p->f->b = p->b; + if(p->b) + p->b->f = p->f; + else + phead = p->f; + p->f = pfree; + pfree = p; +} + +void +killchildren(char *msg) +{ + Process *p; + + kflag = 1; /* to make sure waitup doesn't exit */ + jobs = 0; /* make sure no more get scheduled */ + for(p = phead; p; p = p->f) + expunge(p->pid, msg); + while(waitup(1, (int *)0) == 0) + ; + Bprint(&bout, "mk: %s\n", msg); + Exit(); +} + +static long tslot[1000]; +static long tick; + +void +usage(void) +{ + long t; + + time(&t); + if(tick) + tslot[nrunning] += (t-tick); + tick = t; +} + +void +prusage(void) +{ + int i; + + usage(); + for(i = 0; i <= nevents; i++) + fprint(1, "%d: %ld\n", i, tslot[i]); +} diff --git a/mk/mk/sh.c b/mk/mk/sh.c @@ -0,0 +1,206 @@ +#include "mk.h" + +/* + * This file contains functions that depend on the shell's syntax. Most + * of the routines extract strings observing the shell's escape conventions. + */ + + +/* + * skip a token in quotes. + */ +static char * +squote(char *cp, int c) +{ + Rune r; + int n; + + while(*cp){ + n = chartorune(&r, cp); + if(r == c) + return cp; + if(r == '\\') + n += chartorune(&r, cp+n); + cp += n; + } + SYNERR(-1); /* should never occur */ + fprint(2, "missing closing '\n"); + return 0; +} +/* + * search a string for unescaped characters in a pattern set + */ +static char * +shcharin(char *cp, char *pat) +{ + Rune r; + int n, vargen; + + vargen = 0; + while(*cp){ + n = chartorune(&r, cp); + switch(r){ + case '\\': /* skip escaped char */ + cp += n; + n = chartorune(&r, cp); + break; + case '\'': /* skip quoted string */ + case '"': + cp = squote(cp+1, r); /* n must = 1 */ + if(!cp) + return 0; + break; + case '$': + if(*(cp+1) == '{') + vargen = 1; + break; + case '}': + if(vargen) + vargen = 0; + else if(utfrune(pat, r)) + return cp; + break; + default: + if(vargen == 0 && utfrune(pat, r)) + return cp; + break; + } + cp += n; + } + if(vargen){ + SYNERR(-1); + fprint(2, "missing closing } in pattern generator\n"); + } + return 0; +} + +/* + * extract an escaped token. Possible escape chars are single-quote, + * double-quote,and backslash. + */ +static char* +shexpandquote(char *s, Rune esc, Bufblock *b) +{ + Rune r; + + if (esc == '\\') { + s += chartorune(&r, s); + rinsert(b, r); + return s; + } + + while(*s){ + s += chartorune(&r, s); + if(r == esc) + return s; + if (r == '\\') { + rinsert(b, r); + s += chartorune(&r, s); + } + rinsert(b, r); + } + return 0; +} + +/* + * Input an escaped token. Possible escape chars are single-quote, + * double-quote and backslash. + */ +static int +shescapetoken(Biobuf *bp, Bufblock *buf, int preserve, int esc) +{ + int c, line; + + if(esc == '\\') { + c = Bgetrune(bp); + if(c == '\r') + c = Bgetrune(bp); + if (c == '\n') + mkinline++; + rinsert(buf, c); + return 1; + } + + line = mkinline; + while((c = nextrune(bp, 0)) >= 0){ + if(c == esc){ + if(preserve) + rinsert(buf, c); + return 1; + } + if(c == '\\') { + rinsert(buf, c); + c = Bgetrune(bp); + if(c == '\r') + c = Bgetrune(bp); + if (c < 0) + break; + if (c == '\n') + mkinline++; + } + rinsert(buf, c); + } + SYNERR(line); fprint(2, "missing closing %c\n", esc); + return 0; +} + +/* + * copy a quoted string; s points to char after opening quote + */ +static char * +copysingle(char *s, Rune q, Bufblock *buf) +{ + Rune r; + + while(*s){ + s += chartorune(&r, s); + rinsert(buf, r); + if(r == q) + break; + } + return s; +} +/* + * check for quoted strings. backquotes are handled here; single quotes above. + * s points to char after opening quote, q. + */ +static char * +shcopyq(char *s, Rune q, Bufblock *buf) +{ + if(q == '\'' || q == '"') /* copy quoted string */ + return copysingle(s, q, buf); + + if(q != '`') /* not quoted */ + return s; + + while(*s){ /* copy backquoted string */ + s += chartorune(&q, s); + rinsert(buf, q); + if(q == '`') + break; + if(q == '\'' || q == '"') + s = copysingle(s, q, buf); /* copy quoted string */ + } + return s; +} + +static int +shmatchname(char *name) +{ + USED(name); + + return 1; +} + + +Shell shshell = { + "sh", + "\"'= \t", /*used in parse.c to isolate assignment attribute*/ + ' ', /* inter-word separator in env */ + shcharin, + shexpandquote, + shescapetoken, + shcopyq, + shmatchname +}; + diff --git a/mk/mk/shell.c b/mk/mk/shell.c @@ -0,0 +1,80 @@ +#include "mk.h" + +static Shell *shells[] = { + &rcshell, + &shshell +}; + +Shell *shelldefault = &shshell; + +Shell *shellt; +Word *shellcmd; + +typedef struct Shellstack Shellstack; +struct Shellstack +{ + Shell *t; + Word *w; + Shellstack *next; +}; + +Shellstack *shellstack; + +char* +setshell(Word *w) +{ + int i; + + if(w->s == nil) + return "shell name not found on line"; + + for(i=0; i<nelem(shells); i++) + if(shells[i]->matchname(w->s)) + break; + if(i == nelem(shells)) + return "cannot determine shell type"; + shellt = shells[i]; + shellcmd = w; + return nil; +} + +void +initshell(void) +{ + shellcmd = stow(shelldefault->name); + shellt = shelldefault; + setvar("MKSHELL", shellcmd); +} + +void +pushshell(void) +{ + Shellstack *s; + + /* save */ + s = Malloc(sizeof *s); + s->t = shellt; + s->w = shellcmd; + s->next = shellstack; + shellstack = s; + + initshell(); /* reset to defaults */ +} + +void +popshell(void) +{ + Shellstack *s; + + if(shellstack == nil){ + fprint(2, "internal shellstack error\n"); + Exit(); + } + + s = shellstack; + shellstack = s->next; + shellt = s->t; + shellcmd = s->w; + setvar("MKSHELL", shellcmd); + free(s); +} diff --git a/mk/mk/shprint.c b/mk/mk/shprint.c @@ -0,0 +1,125 @@ +#include "mk.h" + +static char *vexpand(char*, Envy*, Bufblock*); + +#define getfields mkgetfields + +static int +getfields(char *str, char **args, int max, int mflag, char *set) +{ + Rune r; + int nr, intok, narg; + + if(max <= 0) + return 0; + + narg = 0; + args[narg] = str; + if(!mflag) + narg++; + intok = 0; + for(;; str += nr) { + nr = chartorune(&r, str); + if(r == 0) + break; + if(utfrune(set, r)) { + if(narg >= max) + break; + *str = 0; + intok = 0; + args[narg] = str + nr; + if(!mflag) + narg++; + } else { + if(!intok && mflag) + narg++; + intok = 1; + } + } + return narg; +} + +void +shprint(char *s, Envy *env, Bufblock *buf, Shell *sh) +{ + int n; + Rune r; + + while(*s) { + n = chartorune(&r, s); + if (r == '$') + s = vexpand(s, env, buf); + else { + rinsert(buf, r); + s += n; + s = sh->copyq(s, r, buf); /*handle quoted strings*/ + } + } + insert(buf, 0); +} + +static char * +mygetenv(char *name, Envy *env) +{ + if (!env) + return 0; + if (symlook(name, S_WESET, 0) == 0 && symlook(name, S_INTERNAL, 0) == 0) + return 0; + /* only resolve internal variables and variables we've set */ + for(; env->name; env++){ + if (strcmp(env->name, name) == 0) + return wtos(env->values, ' '); + } + return 0; +} + +static char * +vexpand(char *w, Envy *env, Bufblock *buf) +{ + char *s, carry, *p, *q; + + assert("vexpand no $", *w == '$'); + p = w+1; /* skip dollar sign */ + if(*p == '{') { + p++; + q = utfrune(p, '}'); + if (!q) + q = strchr(p, 0); + } else + q = shname(p); + carry = *q; + *q = 0; + s = mygetenv(p, env); + *q = carry; + if (carry == '}') + q++; + if (s) { + bufcpy(buf, s, strlen(s)); + free(s); + } else /* copy name intact*/ + bufcpy(buf, w, q-w); + return(q); +} + +void +front(char *s) +{ + char *t, *q; + int i, j; + char *flds[512]; + + q = strdup(s); + i = getfields(q, flds, 512, 0, " \t\n"); + if(i > 5){ + flds[4] = flds[i-1]; + flds[3] = "..."; + i = 5; + } + t = s; + for(j = 0; j < i; j++){ + for(s = flds[j]; *s; *t++ = *s++); + *t++ = ' '; + } + *t = 0; + free(q); +} diff --git a/mk/mk/symtab.c b/mk/mk/symtab.c @@ -0,0 +1,97 @@ +#include "mk.h" + +#define NHASH 4099 +#define HASHMUL 79L /* this is a good value */ +static Symtab *hash[NHASH]; + +void +syminit(void) +{ + Symtab **s, *ss, *next; + + for(s = hash; s < &hash[NHASH]; s++){ + for(ss = *s; ss; ss = next){ + next = ss->next; + free((char *)ss); + } + *s = 0; + } +} + +Symtab * +symlook(char *sym, int space, void *install) +{ + long h; + char *p; + Symtab *s; + + for(p = sym, h = space; *p; h += *p++) + h *= HASHMUL; + if(h < 0) + h = ~h; + h %= NHASH; + for(s = hash[h]; s; s = s->next) + if((s->space == space) && (strcmp(s->name, sym) == 0)) + return(s); + if(install == 0) + return(0); + s = (Symtab *)Malloc(sizeof(Symtab)); + s->space = space; + s->name = sym; + s->u.ptr = install; + s->next = hash[h]; + hash[h] = s; + return(s); +} + +void +symdel(char *sym, int space) +{ + long h; + char *p; + Symtab *s, *ls; + + /* multiple memory leaks */ + + for(p = sym, h = space; *p; h += *p++) + h *= HASHMUL; + if(h < 0) + h = ~h; + h %= NHASH; + for(s = hash[h], ls = 0; s; ls = s, s = s->next) + if((s->space == space) && (strcmp(s->name, sym) == 0)){ + if(ls) + ls->next = s->next; + else + hash[h] = s->next; + free((char *)s); + } +} + +void +symtraverse(int space, void (*fn)(Symtab*)) +{ + Symtab **s, *ss; + + for(s = hash; s < &hash[NHASH]; s++) + for(ss = *s; ss; ss = ss->next) + if(ss->space == space) + (*fn)(ss); +} + +void +symstat(void) +{ + Symtab **s, *ss; + int n; + int l[1000]; + + memset((char *)l, 0, sizeof(l)); + for(s = hash; s < &hash[NHASH]; s++){ + for(ss = *s, n = 0; ss; ss = ss->next) + n++; + l[n]++; + } + for(n = 0; n < 1000; n++) + if(l[n]) Bprint(&bout, "%ld of length %d\n", l[n], n); +} diff --git a/mk/mk/sys.h b/mk/mk/sys.h @@ -0,0 +1,27 @@ +#include <utf.h> +#include <fmt.h> +#include <bio.h> +#include <regexp9.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <ctype.h> +#include <time.h> +#include <stdint.h> + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY +#define ORDWR O_RDWR +#define nil 0 +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define seek lseek +#define remove unlink +#define exits(x) exit(x && *(char*)x ? 1 : 0) +#define USED(x) if(x){}else +#define create(name, mode, perm) open(name, mode|O_CREAT, perm) +#define ERRMAX 256 + +typedef uintptr_t uintptr; +#define uchar mk_uchar +typedef unsigned char uchar; diff --git a/mk/mk/unix.c b/mk/mk/unix.c @@ -0,0 +1,341 @@ +#define NOPLAN9DEFINES +#include "mk.h" +#include <sys/wait.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/time.h> + +char *shell = "/bin/sh"; +char *shellname = "sh"; + +extern char **environ; + +static void +mkperror(char *s) +{ + fprint(2, "%s: %r\n", s); +} + +void +readenv(void) +{ + char **p, *s; + Word *w; + + for(p = environ; *p; p++){ +/* rsc 5/5/2004 -- This misparses fn#cd={whatever} + s = shname(*p); + if(*s == '=') { + *s = 0; + w = newword(s+1); + } else + w = newword(""); +*/ + s = strchr(*p, '='); + if(s){ + *s = 0; + w = newword(s+1); + } else + w = newword(""); + if (symlook(*p, S_INTERNAL, 0)) + continue; + s = strdup(*p); + setvar(s, (void *)w); + symlook(s, S_EXPORTED, (void*)"")->u.ptr = ""; + } +} + +/* + * done on child side of fork, so parent's env is not affected + * and we don't care about freeing memory because we're going + * to exec immediately after this. + */ +void +exportenv(Envy *e, Shell *sh) +{ + int i; + char **p; + static char buf[16384]; + + p = 0; + for(i = 0; e->name; e++, i++) { + p = (char**) Realloc(p, (i+2)*sizeof(char*)); + if(e->values) + snprint(buf, sizeof buf, "%s=%s", e->name, wtos(e->values, sh->iws)); + else + snprint(buf, sizeof buf, "%s=", e->name); + p[i] = strdup(buf); + } + p[i] = 0; + environ = p; +} + +int +waitfor(char *msg) +{ + int status; + int pid; + + *msg = 0; + pid = wait(&status); + if(pid > 0) { + if(status&0x7f) { + if(status&0x80) + snprint(msg, ERRMAX, "signal %d, core dumped", status&0x7f); + else + snprint(msg, ERRMAX, "signal %d", status&0x7f); + } else if(status&0xff00) + snprint(msg, ERRMAX, "exit(%d)", (status>>8)&0xff); + } + return pid; +} + +void +expunge(int pid, char *msg) +{ + if(strcmp(msg, "interrupt")) + kill(pid, SIGINT); + else + kill(pid, SIGHUP); +} + +int mypid; + +int +shargv(Word *cmd, int extra, char ***pargv) +{ + char **argv; + int i, n; + Word *w; + + n = 0; + for(w=cmd; w; w=w->next) + n++; + + argv = Malloc((n+extra+1)*sizeof(argv[0])); + i = 0; + for(w=cmd; w; w=w->next) + argv[i++] = w->s; + argv[n] = 0; + *pargv = argv; + return n; +} + +int +execsh(char *args, char *cmd, Bufblock *buf, Envy *e, Shell *sh, Word *shellcmd) +{ + char *p, **argv; + int tot, n, pid, in[2], out[2]; + + if(buf && pipe(out) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + mypid = getpid(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid == 0){ + if(buf) + close(out[0]); + if(pipe(in) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid != 0){ + dup2(in[0], 0); + if(buf){ + dup2(out[1], 1); + close(out[1]); + } + close(in[0]); + close(in[1]); + if (e) + exportenv(e, sh); + n = shargv(shellcmd, 1, &argv); + argv[n++] = args; + argv[n] = 0; + execvp(argv[0], argv); + mkperror(shell); + _exit(1); + } + close(out[1]); + close(in[0]); + if(DEBUG(D_EXEC)) + fprint(1, "starting: %s\n", cmd); + p = cmd+strlen(cmd); + while(cmd < p){ + n = write(in[1], cmd, p-cmd); + if(n < 0) + break; + cmd += n; + } + close(in[1]); + _exit(0); + } + if(buf){ + close(out[1]); + tot = 0; + for(;;){ + if (buf->current >= buf->end) + growbuf(buf); + n = read(out[0], buf->current, buf->end-buf->current); + if(n <= 0) + break; + buf->current += n; + tot += n; + } + if (tot && buf->current[-1] == '\n') + buf->current--; + close(out[0]); + } + return pid; +} + +int +pipecmd(char *cmd, Envy *e, int *fd, Shell *sh, Word *shellcmd) +{ + int pid, pfd[2]; + int n; + char **argv; + + if(DEBUG(D_EXEC)) + fprint(1, "pipecmd='%s'\n", cmd);/**/ + + if(fd && pipe(pfd) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid == 0){ + if(fd){ + close(pfd[0]); + dup2(pfd[1], 1); + close(pfd[1]); + } + if(e) + exportenv(e, sh); + n = shargv(shellcmd, 2, &argv); + argv[n++] = "-c"; + argv[n++] = cmd; + argv[n] = 0; + execvp(argv[0], argv); + mkperror(shell); + _exit(1); + } + if(fd){ + close(pfd[1]); + *fd = pfd[0]; + } + return pid; +} + +void +Exit(void) +{ + while(wait(0) >= 0) + ; + exits("error"); +} + +static struct +{ + int sig; + char *msg; +} sigmsgs[] = +{ + SIGALRM, "alarm", + SIGFPE, "sys: fp: fptrap", + SIGPIPE, "sys: write on closed pipe", + SIGILL, "sys: trap: illegal instruction", +/* SIGSEGV, "sys: segmentation violation", */ + 0, 0 +}; + +static void +notifyf(int sig) +{ + int i; + + for(i = 0; sigmsgs[i].msg; i++) + if(sigmsgs[i].sig == sig) + killchildren(sigmsgs[i].msg); + + /* should never happen */ + signal(sig, SIG_DFL); + kill(getpid(), sig); +} + +void +catchnotes(void) +{ + int i; + + for(i = 0; sigmsgs[i].msg; i++) + signal(sigmsgs[i].sig, notifyf); +} + +char* +maketmp(int *pfd) +{ + static char temp[] = "/tmp/mkargXXXXXX"; + static char buf[100]; + int fd; + + strcpy(buf, temp); + fd = mkstemp(buf); + if(fd < 0) + return 0; + *pfd = fd; + return buf; +} + +int +chgtime(char *name) +{ + if(access(name, 0) >= 0) + return utimes(name, 0); + return close(creat(name, 0666)); +} + +void +rcopy(char **to, Resub *match, int n) +{ + int c; + char *p; + + *to = match->s.sp; /* stem0 matches complete target */ + for(to++, match++; --n > 0; to++, match++){ + if(match->s.sp && match->e.ep){ + p = match->e.ep; + c = *p; + *p = 0; + *to = strdup(match->s.sp); + *p = c; + } + else + *to = 0; + } +} + +unsigned long +mkmtime(char *name) +{ + struct stat st; + + if(stat(name, &st) < 0) + return 0; + + return st.st_mtime; +} diff --git a/mk/mk/var.c b/mk/mk/var.c @@ -0,0 +1,41 @@ +#include "mk.h" + +void +setvar(char *name, void *ptr) +{ + symlook(name, S_VAR, ptr)->u.ptr = ptr; + symlook(name, S_MAKEVAR, (void*)""); +} + +static void +print1(Symtab *s) +{ + Word *w; + + Bprint(&bout, "\t%s=", s->name); + for (w = s->u.ptr; w; w = w->next) + Bprint(&bout, "'%s'", w->s); + Bprint(&bout, "\n"); +} + +void +dumpv(char *s) +{ + Bprint(&bout, "%s:\n", s); + symtraverse(S_VAR, print1); +} + +char * +shname(char *a) +{ + Rune r; + int n; + + while (*a) { + n = chartorune(&r, a); + if (!WORDCHR(r)) + break; + a += n; + } + return a; +} diff --git a/mk/mk/varsub.c b/mk/mk/varsub.c @@ -0,0 +1,252 @@ +#include "mk.h" + +static Word *subsub(Word*, char*, char*); +static Word *expandvar(char**); +static Bufblock *varname(char**); +static Word *extractpat(char*, char**, char*, char*); +static int submatch(char*, Word*, Word*, int*, char**); +static Word *varmatch(char *); + +Word * +varsub(char **s) +{ + Bufblock *b; + Word *w; + + if(**s == '{') /* either ${name} or ${name: A%B==C%D}*/ + return expandvar(s); + + b = varname(s); + if(b == 0) + return 0; + + w = varmatch(b->start); + freebuf(b); + return w; +} + +/* + * extract a variable name + */ +static Bufblock* +varname(char **s) +{ + Bufblock *b; + char *cp; + Rune r; + int n; + + b = newbuf(); + cp = *s; + for(;;){ + n = chartorune(&r, cp); + if (!WORDCHR(r)) + break; + rinsert(b, r); + cp += n; + } + if (b->current == b->start){ + SYNERR(-1); + fprint(2, "missing variable name <%s>\n", *s); + freebuf(b); + return 0; + } + *s = cp; + insert(b, 0); + return b; +} + +static Word* +varmatch(char *name) +{ + Word *w; + Symtab *sym; + + sym = symlook(name, S_VAR, 0); + if(sym){ + /* check for at least one non-NULL value */ + for (w = sym->u.ptr; w; w = w->next) + if(w->s && *w->s) + return wdup(w); + } + return 0; +} + +static Word* +expandvar(char **s) +{ + Word *w; + Bufblock *buf; + Symtab *sym; + char *cp, *begin, *end; + + begin = *s; + (*s)++; /* skip the '{' */ + buf = varname(s); + if (buf == 0) + return 0; + cp = *s; + if (*cp == '}') { /* ${name} variant*/ + (*s)++; /* skip the '}' */ + w = varmatch(buf->start); + freebuf(buf); + return w; + } + if (*cp != ':') { + SYNERR(-1); + fprint(2, "bad variable name <%s>\n", buf->start); + freebuf(buf); + return 0; + } + cp++; + end = shellt->charin(cp , "}"); + if(end == 0){ + SYNERR(-1); + fprint(2, "missing '}': %s\n", begin); + Exit(); + } + *end = 0; + *s = end+1; + + sym = symlook(buf->start, S_VAR, 0); + if(sym == 0 || sym->u.ptr == 0) + w = newword(buf->start); + else + w = subsub(sym->u.ptr, cp, end); + freebuf(buf); + return w; +} + +static Word* +extractpat(char *s, char **r, char *term, char *end) +{ + int save; + char *cp; + Word *w; + + cp = shellt->charin(s, term); + if(cp){ + *r = cp; + if(cp == s) + return 0; + save = *cp; + *cp = 0; + w = stow(s); + *cp = save; + } else { + *r = end; + w = stow(s); + } + return w; +} + +static Word* +subsub(Word *v, char *s, char *end) +{ + int nmid; + Word *head, *tail, *w, *h; + Word *a, *b, *c, *d; + Bufblock *buf; + char *cp, *enda; + + a = extractpat(s, &cp, "=%&", end); + b = c = d = 0; + if(PERCENT(*cp)) + b = extractpat(cp+1, &cp, "=", end); + if(*cp == '=') + c = extractpat(cp+1, &cp, "&%", end); + if(PERCENT(*cp)) + d = stow(cp+1); + else if(*cp) + d = stow(cp); + + head = tail = 0; + buf = newbuf(); + for(; v; v = v->next){ + h = w = 0; + if(submatch(v->s, a, b, &nmid, &enda)){ + /* enda points to end of A match in source; + * nmid = number of chars between end of A and start of B + */ + if(c){ + h = w = wdup(c); + while(w->next) + w = w->next; + } + if(PERCENT(*cp) && nmid > 0){ + if(w){ + bufcpy(buf, w->s, strlen(w->s)); + bufcpy(buf, enda, nmid); + insert(buf, 0); + free(w->s); + w->s = strdup(buf->start); + } else { + bufcpy(buf, enda, nmid); + insert(buf, 0); + h = w = newword(buf->start); + } + buf->current = buf->start; + } + if(d && *d->s){ + if(w){ + + bufcpy(buf, w->s, strlen(w->s)); + bufcpy(buf, d->s, strlen(d->s)); + insert(buf, 0); + free(w->s); + w->s = strdup(buf->start); + w->next = wdup(d->next); + while(w->next) + w = w->next; + buf->current = buf->start; + } else + h = w = wdup(d); + } + } + if(w == 0) + h = w = newword(v->s); + + if(head == 0) + head = h; + else + tail->next = h; + tail = w; + } + freebuf(buf); + delword(a); + delword(b); + delword(c); + delword(d); + return head; +} + +static int +submatch(char *s, Word *a, Word *b, int *nmid, char **enda) +{ + Word *w; + int n; + char *end; + + n = 0; + for(w = a; w; w = w->next){ + n = strlen(w->s); + if(strncmp(s, w->s, n) == 0) + break; + } + if(a && w == 0) /* a == NULL matches everything*/ + return 0; + + *enda = s+n; /* pointer to end a A part match */ + *nmid = strlen(s)-n; /* size of remainder of source */ + end = *enda+*nmid; + for(w = b; w; w = w->next){ + n = strlen(w->s); + if(strcmp(w->s, end-n) == 0){ + *nmid -= n; + break; + } + } + if(b && w == 0) /* b == NULL matches everything */ + return 0; + return 1; +} diff --git a/mk/mk/word.c b/mk/mk/word.c @@ -0,0 +1,189 @@ +#include "mk.h" + +static Word *nextword(char**); + +Word* +newword(char *s) +{ + Word *w; + + w = (Word *)Malloc(sizeof(Word)); + w->s = strdup(s); + w->next = 0; + return(w); +} + +Word * +stow(char *s) +{ + Word *head, *w, *new; + + w = head = 0; + while(*s){ + new = nextword(&s); + if(new == 0) + break; + if (w) + w->next = new; + else + head = w = new; + while(w->next) + w = w->next; + + } + if (!head) + head = newword(""); + return(head); +} + +char * +wtos(Word *w, int sep) +{ + Bufblock *buf; + char *cp; + + buf = newbuf(); + for(; w; w = w->next){ + for(cp = w->s; *cp; cp++) + insert(buf, *cp); + if(w->next) + insert(buf, sep); + } + insert(buf, 0); + cp = strdup(buf->start); + freebuf(buf); + return(cp); +} + +Word* +wdup(Word *w) +{ + Word *v, *new, *base; + + v = base = 0; + while(w){ + new = newword(w->s); + if(v) + v->next = new; + else + base = new; + v = new; + w = w->next; + } + return base; +} + +void +delword(Word *w) +{ + Word *v; + + while(v = w){ + w = w->next; + if(v->s) + free(v->s); + free(v); + } +} + +/* + * break out a word from a string handling quotes, executions, + * and variable expansions. + */ +static Word* +nextword(char **s) +{ + Bufblock *b; + Word *head, *tail, *w; + Rune r; + char *cp; + int empty; + + cp = *s; + b = newbuf(); +restart: + head = tail = 0; + while(*cp == ' ' || *cp == '\t') /* leading white space */ + cp++; + empty = 1; + while(*cp){ + cp += chartorune(&r, cp); + switch(r) + { + case ' ': + case '\t': + case '\n': + goto out; + case '\\': + case '\'': + case '"': + empty = 0; + cp = shellt->expandquote(cp, r, b); + if(cp == 0){ + fprint(2, "missing closing quote: %s\n", *s); + Exit(); + } + break; + case '$': + w = varsub(&cp); + if(w == 0){ + if(empty) + goto restart; + break; + } + empty = 0; + if(b->current != b->start){ + bufcpy(b, w->s, strlen(w->s)); + insert(b, 0); + free(w->s); + w->s = strdup(b->start); + b->current = b->start; + } + if(head){ + bufcpy(b, tail->s, strlen(tail->s)); + bufcpy(b, w->s, strlen(w->s)); + insert(b, 0); + free(tail->s); + tail->s = strdup(b->start); + tail->next = w->next; + free(w->s); + free(w); + b->current = b->start; + } else + tail = head = w; + while(tail->next) + tail = tail->next; + break; + default: + empty = 0; + rinsert(b, r); + break; + } + } +out: + *s = cp; + if(b->current != b->start){ + if(head){ + cp = b->current; + bufcpy(b, tail->s, strlen(tail->s)); + bufcpy(b, b->start, cp-b->start); + insert(b, 0); + free(tail->s); + tail->s = strdup(cp); + } else { + insert(b, 0); + head = newword(b->start); + } + } + freebuf(b); + return head; +} + +void +dumpw(char *s, Word *w) +{ + Bprint(&bout, "%s", s); + for(; w; w = w->next) + Bprint(&bout, " '%s'", w->s); + Bputc(&bout, '\n'); +} diff --git a/mk/mkfile b/mk/mkfile @@ -0,0 +1,4 @@ +TARG = libbio libutf libregexp libfmt mk + +<$mkbuild/mk.parent + diff --git a/mkfile b/mkfile @@ -0,0 +1,5 @@ +TARG = _install find sed ed grep expr od stty nawk \ + patch diff printf dc dd fmt hd bc ps pgrep tar cp mk \ + libcommon libuxre lex yacc + +<$mkbuild/mk.parent diff --git a/nawk/COPYING b/nawk/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/nawk/NOTES b/nawk/NOTES @@ -0,0 +1,20 @@ +Notes for the 'nawk' utility +============================ + +Changes since the version published by Caldera in OS Utilities 0.1a +(<http://unixtools.sourceforge.net/>) include: + +- The lex part of the code can be built with both Unix lex and flex. +- Support for multibyte characters. +- Proper support for LC_TIME (locale-specifix radix character recognized + in input data, but not in scripts). +- No line length limitations on input and output data. +- No limit on the number of fields per record. + +4.4BSD old awk and Brian W. Kernighan's 'One True awk' (available at +<http://cm.bell-labs.com/cm/cs/who/bwk/index.html>) have been used as +reference for some of the changes; in addition, the manual page +enclosed here has been derived from 'One True awk' since Caldera +did not enclose one. + + Gunnar Ritter 7/30/05 diff --git a/nawk/awk.g.y b/nawk/awk.g.y @@ -0,0 +1,468 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from RCS Header: awk.g.y 1.2 91/06/25 */ + +%{ +#include "awk.h" +#include <unistd.h> +#include <inttypes.h> +#include <pfmt.h> +int yywrap(void) { return(1); } +#ifndef DEBUG +# define PUTS(x) +#endif +Node *beginloc = 0, *endloc = 0; +int infunc = 0; /* = 1 if in arglist or body of func */ +unsigned char *curfname = 0; +Node *arglist = 0; /* list of args for current function */ +static void setfname(Cell *); +static int constnode(Node *); +static unsigned char *strnode(Node *); +static Node *notnull(Node *); +extern const char illstat[]; + +extern int yylex(void); +%} + +%union { + Node *p; + Cell *cp; + intptr_t i; + unsigned char *s; +} + +%token <i> FIRSTTOKEN /* must be first */ +%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND +%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' +%token <i> ARRAY +%token <i> MATCH NOTMATCH MATCHOP +%token <i> FINAL DOT ALL CCL NCCL CHAR MCHAR OR STAR QUEST PLUS +%token <i> AND BOR APPEND EQ GE GT LE LT NE IN +%token <i> ARG BLTIN BREAK CONTINUE DELETE DO EXIT FOR FUNC +%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT +%token <i> ADD MINUS MULT DIVIDE MOD +%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ +%token <i> PRINT PRINTF SPRINTF +%token <p> ELSE INTEST CONDEXPR +%token <i> POSTINCR PREINCR POSTDECR PREDECR +%token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD +%token <s> REGEXPR + +%type <p> pas pattern ppattern plist pplist patlist prarg term re +%type <p> pa_pat pa_stat pa_stats +%type <s> reg_expr +%type <p> simple_stmt opt_simple_stmt stmt stmtlist +%type <p> var varname funcname varlist +%type <p> for if while +%type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor +%type <i> subop print + +%right ASGNOP +%right '?' +%right ':' +%left BOR +%left AND +%left GETLINE +%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' +%left ARG BLTIN BREAK CALL CONTINUE DELETE DO EXIT FOR FIELD FUNC +%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER +%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR +%left REGEXPR VAR VARNF IVAR WHILE '(' +%left CAT +%left '+' '-' +%left '*' '/' '%' +%left NOT UMINUS +%right POWER +%right DECR INCR +%left INDIRECT +%token LASTTOKEN /* must be last */ + +%% + +program: + pas { if (errorflag==0) + winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } + | error { yyclearin; bracecheck(); vyyerror(":95:Bailing out"); } + ; + +and: + AND | and NL + ; + +bor: + BOR | bor NL + ; + +comma: + ',' | comma NL + ; + +do: + DO { } | do NL + ; + +else: + ELSE { } | else NL + ; + +for: + FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt + { $$ = stat4(FOR, $3, notnull($5), $7, $9); } + | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt + { $$ = stat4(FOR, $3, NIL, $6, $8); } + | FOR '(' varname IN varname rparen stmt + { $$ = stat3(IN, $3, makearr($5), $7); } + ; + +funcname: + VAR { setfname($1); } + | CALL { setfname($1); } + ; + +if: + IF '(' pattern rparen { $$ = notnull($3); } + ; + +lbrace: + '{' | lbrace NL + ; + +nl: + NL | nl NL + ; + +opt_nl: + /* empty */ { $$ = 0; } + | nl + ; + +opt_pst: + /* empty */ { $$ = 0; } + | pst + ; + + +opt_simple_stmt: + /* empty */ { $$ = 0; } + | simple_stmt + ; + +pas: + opt_pst { $$ = 0; } + | opt_pst pa_stats opt_pst { $$ = $2; } + ; + +pa_pat: + pattern { $$ = notnull($1); } + ; + +pa_stat: + pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } + | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } + | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } + | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } + | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } + | XBEGIN lbrace stmtlist '}' + { beginloc = linkum(beginloc, $3); $$ = 0; } + | XEND lbrace stmtlist '}' + { endloc = linkum(endloc, $3); $$ = 0; } + | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' + { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } + ; + +pa_stats: + pa_stat + | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } + ; + +patlist: + pattern + | patlist comma pattern { $$ = linkum($1, $3); } + ; + +ppattern: + var ASGNOP ppattern { $$ = op2($2, $1, $3); } + | ppattern '?' ppattern ':' ppattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | ppattern bor ppattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | ppattern and ppattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | ppattern MATCHOP ppattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | term + ; + +pattern: + var ASGNOP pattern { $$ = op2($2, $1, $3); } + | pattern '?' pattern ':' pattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | pattern bor pattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | pattern and pattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | NOT pattern + { $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); } + | pattern EQ pattern { $$ = op2($2, $1, $3); } + | pattern GE pattern { $$ = op2($2, $1, $3); } + | pattern GT pattern { $$ = op2($2, $1, $3); } + | pattern LE pattern { $$ = op2($2, $1, $3); } + | pattern LT pattern { $$ = op2($2, $1, $3); } + | pattern NE pattern { $$ = op2($2, $1, $3); } + | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | pattern MATCHOP pattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); } + | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); } + | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | term + ; + +plist: + pattern comma pattern { $$ = linkum($1, $3); } + | plist comma pattern { $$ = linkum($1, $3); } + ; + +pplist: + ppattern + | pplist comma ppattern { $$ = linkum($1, $3); } + ; + +prarg: + /* empty */ { $$ = rectonode(); } + | pplist + | '(' plist ')' { $$ = $2; } + ; + +print: + PRINT | PRINTF + ; + +pst: + NL | ';' | pst NL | pst ';' + ; + +rbrace: + '}' { } | rbrace NL + ; + +re: + reg_expr + { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1,0)); } + | NOT re {$$ = op1(NOT, notnull($2)); } + ; + +reg_expr: + '/' {startreg();} REGEXPR '/' { $$ = $3; } + ; + +rparen: + ')' | rparen NL + ; + +simple_stmt: + print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg { $$ = stat3($1, $2, NIL, NIL); } + | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } + | DELETE varname { yyclearin; vyyerror(":96:You can only delete array[element]"); $$ = stat1(DELETE, $2); } + | pattern { $$ = exptostat($1); } + | error { yyclearin; vyyerror(illstat); } + ; + +st: + nl { } | ';' opt_nl { } + ; + +stmt: + BREAK st { $$ = stat1(BREAK, NIL); } + | CONTINUE st { $$ = stat1(CONTINUE, NIL); } + | do stmt WHILE '(' pattern ')' st + { $$ = stat2(DO, $2, notnull($5)); } + | EXIT pattern st { $$ = stat1(EXIT, $2); } + | EXIT st { $$ = stat1(EXIT, NIL); } + | for + | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } + | if stmt { $$ = stat3(IF, $1, $2, NIL); } + | lbrace stmtlist rbrace { $$ = $2; } + | NEXT st { if (infunc) + vyyerror(":97:Next is illegal inside a function"); + $$ = stat1(NEXT, NIL); } + | RETURN pattern st { $$ = stat1(RETURN, $2); } + | RETURN st { $$ = stat1(RETURN, NIL); } + | simple_stmt st + | while stmt { $$ = stat2(WHILE, $1, $2); } + | ';' opt_nl { $$ = 0; } + ; + +stmtlist: + stmt + | stmtlist stmt { $$ = linkum($1, $2); } + ; + +subop: + SUB | GSUB + ; + +term: + term '+' term { $$ = op2(ADD, $1, $3); } + | term '-' term { $$ = op2(MINUS, $1, $3); } + | term '*' term { $$ = op2(MULT, $1, $3); } + | term '/' term { $$ = op2(DIVIDE, $1, $3); } + | term '%' term { $$ = op2(MOD, $1, $3); } + | term POWER term { $$ = op2(POWER, $1, $3); } + | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } + | '+' term %prec UMINUS { $$ = $2; } + | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } + | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); } + | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); } + | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); } + | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); } + | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); } + | DECR var { $$ = op1(PREDECR, $2); } + | INCR var { $$ = op1(PREINCR, $2); } + | var DECR { $$ = op1(POSTDECR, $1); } + | var INCR { $$ = op1(POSTINCR, $1); } + | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); } + | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); } + | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } + | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } + | INDEX '(' pattern comma pattern ')' + { $$ = op2(INDEX, $3, $5); } + | INDEX '(' pattern comma reg_expr ')' + { vyyerror(":98:Index() doesn't permit regular expressions"); + $$ = op2(INDEX, $3, (Node*)$5); } + | '(' pattern ')' { $$ = $2; } + | MATCHFCN '(' pattern comma reg_expr ')' + { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } + | MATCHFCN '(' pattern comma pattern ')' + { if (constnode($5)) + $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); + else + $$ = op3(MATCHFCN, (Node *)1, $3, $5); } + | NUMBER { $$ = valtonode($1, CCON); } + | SPLIT '(' pattern comma varname comma pattern ')' /* string */ + { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } + | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ + { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } + | SPLIT '(' pattern comma varname ')' + { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ + | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } + | STRING { $$ = valtonode($1, CCON); } + | subop '(' reg_expr comma pattern ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } + | subop '(' pattern comma pattern ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); + else + $$ = op4($1, (Node *)1, $3, $5, rectonode()); } + | subop '(' reg_expr comma pattern comma var ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } + | subop '(' pattern comma pattern comma var ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); + else + $$ = op4($1, (Node *)1, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, NIL); } + | var + | re + ; + +var: + varname + | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } + | FIELD { $$ = valtonode($1, CFLD); } + | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); } + | INDIRECT term { $$ = op1(INDIRECT, $2); } + ; + +varlist: + /* nothing */ { arglist = $$ = 0; } + | VAR { arglist = $$ = valtonode($1,CVAR); } + | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); } + ; + +varname: + VAR { $$ = valtonode($1, CVAR); } + | ARG { $$ = op1(ARG, (Node *) $1); } + | VARNF { $$ = op1(VARNF, (Node *) $1); } + ; + + +while: + WHILE '(' pattern rparen { $$ = notnull($3); } + ; + +%% + +static void +setfname(Cell *p) +{ + if (isarr(p)) + vyyerror(":99:%s is an array, not a function", p->nval); + else if (isfunc(p)) + vyyerror(":100:You cannot define function %s more than once", p->nval); + curfname = p->nval; +} + +static int +constnode(Node *p) +{ + return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON; +} + +static unsigned char *strnode(Node *p) +{ + return ((Cell *)(p->narg[0]))->sval; +} + +static Node *notnull(Node *n) +{ + switch (n->nobj) { + case LE: case LT: case EQ: case NE: case GT: case GE: + case BOR: case AND: case NOT: + return n; + default: + return op2(NE, n, nullnode); + } +} diff --git a/nawk/awk.h b/nawk/awk.h @@ -0,0 +1,387 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.h 1.23 (gritter) 12/25/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/awk.h /main/uw7_nj/1 */ +/* from RCS Header: awk.h 1.2 91/06/25 */ + +typedef double Awkfloat; + +#define xfree(a) { if ((a) != NULL) { free(a); a = NULL; } } +#define MAXLABEL 25 + +extern const char version[]; + +extern char errbuf[200]; +#define ERROR snprintf(errbuf, sizeof errbuf, +#define FATAL ), error(1, errbuf) +#define WARNING ), error(0, errbuf) +#define SYNTAX ), yyerror(errbuf) + +extern int compile_time; /* 1 if compiling, 0 if running */ + +extern int posix; /* if POSIX behavior is desired */ + +/* + * This is done to prevent redefinition of our own definitions for FS with + * those defined in the system's header files. Same of RS (on HP-UX/PA-RISC). + */ +#undef FS +#undef RS + +extern unsigned char **FS; +extern unsigned char **RS; +extern unsigned char **ORS; +extern unsigned char **OFS; +extern unsigned char **OFMT; +extern unsigned char **CONVFMT; +extern Awkfloat *NR; +extern Awkfloat *FNR; +extern Awkfloat *NF; +extern unsigned char **FILENAME; +extern unsigned char **SUBSEP; +extern Awkfloat *RSTART; +extern Awkfloat *RLENGTH; + +#define CHUNK 512 /* record and string increment */ + +extern unsigned char *record; +extern int recsize; +extern int dbg; +extern int lineno; +extern int errorflag; +extern int donefld; /* 1 if record broken into fields */ +extern int donerec; /* 1 if record is valid (no fld has changed */ + +#define CBUFLEN 5120 +extern unsigned char cbuf[CBUFLEN]; /* miscellaneous character collection */ + +extern unsigned char *patbeg; /* beginning of pattern matched */ +extern int patlen; /* length. set in b.c */ + +extern int mb_cur_max; /* MB_CUR_MAX, for acceleration purposes */ + +extern const char outofspace[]; /* message */ + +/* Cell: all information about a variable or constant */ + +typedef struct Cell { + unsigned char ctype; /* OCELL, OBOOL, OJUMP, etc. */ + unsigned char csub; /* CCON, CTEMP, CFLD, etc. */ + unsigned char *nval; /* name, for variables only */ + unsigned char *sval; /* string value */ + Awkfloat fval; /* value as number */ + unsigned tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + struct Cell *cnext; /* ptr to next if chained */ +} Cell; + +typedef struct { /* symbol table array */ + int nelem; /* elements in table right now */ + int size; /* size of tab */ + Cell **tab; /* hash table pointers */ +} Array; + +#define NSYMTAB 50 /* initial size of a symbol table */ +extern Array *symtab, *makesymtab(int); +#define setsymtab(n, s, f, t, tp) ssetsymtab((unsigned char *)n, \ + (unsigned char *)s, \ + f, t, tp) +extern Cell *ssetsymtab(unsigned char *, unsigned char *, Awkfloat, + unsigned, Array *); +#define lookup(s, tp) slookup((unsigned char *)s, tp) +extern Cell *slookup(unsigned char *, Array *); + +extern Cell *recloc; /* location of input record */ +extern Cell *nrloc; /* NR */ +extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ +extern Cell *nfloc; /* NF */ +extern Cell *rstartloc; /* RSTART */ +extern Cell *rlengthloc; /* RLENGTH */ + +/* Cell.tval values: */ +#define NUM 01 /* number value is valid */ +#define STR 02 /* string value is valid */ +#define DONTFREE 04 /* string space is not freeable */ +#define CON 010 /* this is a constant */ +#define ARR 020 /* this is an array */ +#define FCN 040 /* this is a function name */ +#define FLD 0100 /* this is a field $1, $2, ... */ +#define REC 0200 /* this is $0 */ +#define CANBENUM 0400 /* tells setsymtab() to try for NUM, too */ + +#define freeable(p) (!((p)->tval & DONTFREE)) + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(c) _IO_getc_unlocked(c) +#endif /* _IO_getc_unlocked */ +#endif /* __GLIBC__ */ + +#define getline xxgetline /* avoid glibc _GNU_SOURCE collision */ + +#define DEBUG +#ifdef DEBUG + /* uses have to be doubly parenthesized */ +# define dprintf(x) if (dbg) printf x +#else +# define dprintf(x) +#endif + +#ifndef IN_MAKETAB +#include <wchar.h> + +/* + * Get next character from string s and store it in wc; n is set to + * the length of the corresponding byte sequence. + */ +#define next(wc, s, n) (mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowc(&(wc), (char *)(s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) :\ + ((wc) = *(s), (n) = 1)) +#endif /* !IN_MAKETAB */ + +/* function types */ +#define FLENGTH 1 +#define FSQRT 2 +#define FEXP 3 +#define FLOG 4 +#define FINT 5 +#define FSYSTEM 6 +#define FRAND 7 +#define FSRAND 8 +#define FSIN 9 +#define FCOS 10 +#define FATAN 11 +#define FTOUPPER 12 +#define FTOLOWER 13 +#define FCLOSE 14 + +/* Node: parse tree is made of nodes, with Cell's at bottom */ + +typedef struct Node { + int ntype; + struct Node *nnext; + int lineno; + int nobj; + struct Node *narg[1]; /* variable: actual size set by calling malloc */ +} Node; + +#define NIL ((Node *) 0) + +extern Node *winner; +extern Node *nullstat; +extern Node *nullnode; + +/* ctypes */ +#define OCELL 1 +#define OBOOL 2 +#define OJUMP 3 + +/* Cell subtypes: csub */ +#define CFREE 7 +#define CCOPY 6 +#define CCON 5 +#define CTEMP 4 +#define CNAME 3 +#define CVAR 2 +#define CFLD 1 + +/* bool subtypes */ +#define BTRUE 11 +#define BFALSE 12 + +/* jump subtypes */ +#define JEXIT 21 +#define JNEXT 22 +#define JBREAK 23 +#define JCONT 24 +#define JRET 25 + +/* node types */ +#define NVALUE 1 +#define NSTAT 2 +#define NEXPR 3 +#define NFIELD 4 + +extern Cell *(*proctab[])(Node **, int); +extern int pairstack[]; +extern long paircnt; + +#define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc) +#define isvalue(n) ((n)->ntype == NVALUE) +#define isexpr(n) ((n)->ntype == NEXPR) +#define isjump(n) ((n)->ctype == OJUMP) +#define isexit(n) ((n)->csub == JEXIT) +#define isbreak(n) ((n)->csub == JBREAK) +#define iscont(n) ((n)->csub == JCONT) +#define isnext(n) ((n)->csub == JNEXT) +#define isret(n) ((n)->csub == JRET) +#define isstr(n) ((n)->tval & STR) +#define isnum(n) ((n)->tval & NUM) +#define isarr(n) ((n)->tval & ARR) +#define isfunc(n) ((n)->tval & FCN) +#define istrue(n) ((n)->csub == BTRUE) +#define istemp(n) ((n)->csub == CTEMP) + +#include <regex.h> + +typedef struct fa { + unsigned char *restr; + int use; + int notbol; + regex_t re; +} fa; + +/* awk.g.c */ +extern int yywrap(void); +extern int yyparse(void); +/* awk.lx.c */ +extern int yylex(void); +extern void startreg(void); +extern int awk_input(void); +/* b.c */ +extern fa *makedfa(unsigned char *, int); +extern int match(void *, unsigned char *); +extern int pmatch(void *, unsigned char *); +extern int nematch(void *, unsigned char *); +/* lib.c */ +extern void fldinit(void); +extern void initgetrec(void); +extern int getrec(unsigned char **, int *); +extern int readrec(unsigned char **, int *, FILE *); +extern unsigned char *getargv(int); +extern void setclvar(unsigned char *); +extern void fldbld(void); +extern void newfld(int); +extern void recbld(void); +extern Cell *fieldadr(int); +extern void vyyerror(const char *, ...); +extern void yyerror(char *); +extern void fpecatch(int); +extern void bracecheck(void); +extern void error(int, const char *, ...); +extern void bclass(int); +extern double errcheck(double, unsigned char *); +extern void PUTS(unsigned char *); +extern int isclvar(unsigned char *); +extern int is2number(unsigned char *, Cell *); +extern double awk_atof(const char *); +extern unsigned char *makerec(const unsigned char *, int); +/* main.c */ +extern int pgetc(void); +/* parse.c */ +extern Node *nodealloc(int); +extern Node *exptostat(Node *); +extern Node *node1(int, Node *); +extern Node *node2(int, Node *, Node *); +extern Node *node3(int, Node *, Node *, Node *); +extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *stat3(int, Node *, Node *, Node *); +extern Node *op2(int, Node *, Node *); +extern Node *op1(int, Node *); +extern Node *stat1(int, Node *); +extern Node *op3(int, Node *, Node *, Node *); +extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *stat2(int, Node *, Node *); +extern Node *stat4(int, Node *, Node *, Node *, Node *); +extern Node *valtonode(Cell *, int); +extern Node *rectonode(void); +extern Node *makearr(Node *); +extern Node *pa2stat(Node *, Node *, Node *); +extern Node *linkum(Node *, Node *); +extern void defn(Cell *, Node *, Node *); +extern int isarg(const char *); +/* proctab.c */ +extern unsigned char *tokname(int); +/* run.c */ +extern int run(Node *); +extern Cell *r_execute(Node *); +extern Cell *program(Node **, int); +extern Cell *call(Node **, int); +extern Cell *copycell(Cell *); +extern Cell *arg(Node **, int); +extern Cell *jump(Node **, int); +extern Cell *getline(Node **, int); +extern Cell *getnf(Node **, int); +extern Cell *array(Node **, int); +extern Cell *delete(Node **, int); +extern Cell *intest(Node **, int); +extern Cell *matchop(Node **, int); +extern Cell *boolop(Node **, int); +extern Cell *relop(Node **, int); +extern Cell *gettemp(const char *); +extern Cell *indirect(Node **, int); +extern Cell *substr(Node **, int); +extern Cell *sindex(Node **, int); +extern int format(unsigned char **, int *, const unsigned char *, Node *); +extern Cell *awsprintf(Node **, int); +extern Cell *aprintf(Node **, int); +extern Cell *arith(Node **, int); +extern double ipow(double, int); +extern Cell *incrdecr(Node **, int); +extern Cell *assign(Node **, int); +extern Cell *cat(Node **, int); +extern Cell *pastat(Node **, int); +extern Cell *dopa2(Node **, int); +extern Cell *split(Node **, int); +extern Cell *condexpr(Node **, int); +extern Cell *ifstat(Node **, int); +extern Cell *whilestat(Node **, int); +extern Cell *dostat(Node **, int); +extern Cell *forstat(Node **, int); +extern Cell *instat(Node **, int); +extern Cell *bltin(Node **, int); +extern Cell *print(Node **, int); +extern Cell *nullproc(Node **, int); +extern FILE *redirect(int, Node *); +extern FILE *openfile(int, unsigned char *); +extern Cell *sub(Node **, int); +extern Cell *gsub(Node **, int); +extern int chrlen(const unsigned char *); +extern int chrdist(const unsigned char *, const unsigned char *); +/* tran.c */ +extern void syminit(void); +extern void arginit(int, unsigned char **); +extern void envinit(unsigned char **); +extern Array *makesymtab(int); +extern void freesymtab(Cell *); +extern void freeelem(Cell *, unsigned char *); +extern Cell *ssetsymtab(unsigned char *, unsigned char *, + Awkfloat, unsigned, Array *); +extern Cell *slookup(unsigned char *, Array *); +extern Awkfloat setfval(Cell *, Awkfloat); +extern void funnyvar(Cell *, char *); +extern unsigned char *setsval(Cell *, unsigned char *); +extern Awkfloat r_getfval(Cell *); +extern unsigned char *r_getsval(Cell *); +#define tostring(s) stostring((unsigned char *)s) +extern unsigned char *stostring(const unsigned char *); +extern unsigned char *qstring(unsigned char *, int); diff --git a/nawk/awk.lx.l b/nawk/awk.lx.l @@ -0,0 +1,383 @@ +%{ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.lx.l 1.13 (gritter) 11/22/05> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * flex port partially taken from 4.4BSD awk, + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/awk.lx.l /main/uw7_nj/1 */ +/* from RCS Header: awk.lx.l 1.2 91/06/25 */ + +/*%Start A str sc reg comment*/ +/*%X A str sc reg comment*/ +%} +%X A str reg + +%{ + +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> +#include <unistd.h> + +static void awk_unputstr(const char *s); + +#ifdef FLEX_SCANNER +static int awk_yytchar; +int awk_input(void); +static void awk_unput(int c); +#undef YY_INPUT +#define YY_INPUT(buf, result, max_size) { \ + int c = awk_input(); \ + result = (c == EOF || c == '\0') ? YY_NULL : (buf[0] = c, 1); \ +} +#else /* !FLEX_SCANNER */ +#undef input /* defeat lex */ +#undef unput +int input(void); +void unput(int c); +#define awk_unput(c) unput(c) +#define awk_yytchar yytchar +#endif /* !FLEX_SCANNER */ + + +extern YYSTYPE yylval; +extern int infunc; + +int lineno = 1; +int bracecnt = 0; +int brackcnt = 0; +int parencnt = 0; +#define DEBUG +#ifdef DEBUG +# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } +#else +# define RET(x) return(x) +#endif + +#define CADD cbuf[clen++] = yytext[0]; \ + if (clen >= CBUFLEN-1) { \ + vyyerror(":90:String/reg expr %.10s ... too long", cbuf); \ + BEGIN INITIAL; \ + } + +static const char extra[] = ":91:Extra %c"; +extern const char nlstring[]; + +unsigned char cbuf[CBUFLEN]; +unsigned char *s; +int clen, cflag; +%} + +A [a-zA-Z_] +B [a-zA-Z0-9_] +D [0-9] +O [0-7] +H [0-9a-fA-F] +WS [ \t] + +%% + static int sc_flag = 0; + + if (sc_flag) { + BEGIN INITIAL; + sc_flag = 0; + RET('}'); + } + +\n { lineno++; RET(NL); } +#.* { ; } /* strip comments */ +{WS}+ { ; } +<INITIAL,reg>"\\"\n lineno++; +; { RET(';'); } + +BEGIN { RET(XBEGIN); } +END { RET(XEND); } +func(tion)? { if (infunc) vyyerror(":92:Illegal nested function"); RET(FUNC); } +return { if (!infunc) vyyerror(":93:Return not in function"); RET(RETURN); } +"&&" { RET(AND); } +"||" { RET(BOR); } +"!" { RET(NOT); } +"!=" { yylval.i = NE; RET(NE); } +"~" { yylval.i = MATCH; RET(MATCHOP); } +"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } +"<" { yylval.i = LT; RET(LT); } +"<=" { yylval.i = LE; RET(LE); } +"==" { yylval.i = EQ; RET(EQ); } +">=" { yylval.i = GE; RET(GE); } +">" { yylval.i = GT; RET(GT); } +">>" { yylval.i = APPEND; RET(APPEND); } +"++" { yylval.i = INCR; RET(INCR); } +"--" { yylval.i = DECR; RET(DECR); } +"+=" { yylval.i = ADDEQ; RET(ASGNOP); } +"-=" { yylval.i = SUBEQ; RET(ASGNOP); } +"*=" { yylval.i = MULTEQ; RET(ASGNOP); } +"/=" { yylval.i = DIVEQ; RET(ASGNOP); } +"%=" { yylval.i = MODEQ; RET(ASGNOP); } +"^=" { yylval.i = POWEQ; RET(ASGNOP); } +"**=" { yylval.i = POWEQ; RET(ASGNOP); } +"=" { yylval.i = ASSIGN; RET(ASGNOP); } +"**" { RET(POWER); } +"^" { RET(POWER); } + +"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } +"$NF" { awk_unputstr("(NF)"); return(INDIRECT); } +"$"{A}{B}* { int c, n; + c = awk_yytchar; + if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { + awk_unputstr(yytext+1); + return(INDIRECT); + } else { + yylval.cp = setsymtab((unsigned char *)yytext+1,"",0.0,STR|NUM,symtab); + RET(IVAR); + } + } +"$" { RET(INDIRECT); } +NF { yylval.cp = setsymtab((unsigned char *)yytext, "", 0.0, NUM, symtab); RET(VARNF); } + +({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { + yylval.cp = setsymtab((unsigned char *)yytext, tostring((unsigned char *)yytext), awk_atof(yytext), CON|NUM, symtab); + RET(NUMBER); } + +while { RET(WHILE); } +for { RET(FOR); } +do { RET(DO); } +if { RET(IF); } +else { RET(ELSE); } +next { RET(NEXT); } +exit { RET(EXIT); } +break { RET(BREAK); } +continue { RET(CONTINUE); } +print { yylval.i = PRINT; RET(PRINT); } +printf { yylval.i = PRINTF; RET(PRINTF); } +sprintf { yylval.i = SPRINTF; RET(SPRINTF); } +split { yylval.i = SPLIT; RET(SPLIT); } +substr { RET(SUBSTR); } +sub { yylval.i = SUB; RET(SUB); } +gsub { yylval.i = GSUB; RET(GSUB); } +index { RET(INDEX); } +match { RET(MATCHFCN); } +in { RET(IN); } +getline { RET(GETLINE); } +delete { RET(DELETE); } +length { yylval.i = FLENGTH; RET(BLTIN); } +log { yylval.i = FLOG; RET(BLTIN); } +int { yylval.i = FINT; RET(BLTIN); } +exp { yylval.i = FEXP; RET(BLTIN); } +sqrt { yylval.i = FSQRT; RET(BLTIN); } +sin { yylval.i = FSIN; RET(BLTIN); } +cos { yylval.i = FCOS; RET(BLTIN); } +atan2 { yylval.i = FATAN; RET(BLTIN); } +system { yylval.i = FSYSTEM; RET(BLTIN); } +rand { yylval.i = FRAND; RET(BLTIN); } +srand { yylval.i = FSRAND; RET(BLTIN); } +toupper { yylval.i = FTOUPPER; RET(BLTIN); } +tolower { yylval.i = FTOLOWER; RET(BLTIN); } +close { yylval.i = FCLOSE; RET(BLTIN); } + +{A}{B}* { int n, c; + c = awk_yytchar; /* look for '(' */ + if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { + yylval.i = n; + RET(ARG); + } else { + yylval.cp = setsymtab((unsigned char *)yytext,"",0.0,STR|NUM,symtab); + if (c == '(') { + RET(CALL); + } else { + RET(VAR); + } + } + } +\" { BEGIN str; clen = 0; } + +"}" { if (--bracecnt < 0) vyyerror(extra, '}'); sc_flag = 1; RET(';'); } +"]" { if (--brackcnt < 0) vyyerror(extra, ']'); RET(']'); } +")" { if (--parencnt < 0) vyyerror(extra, ')'); RET(')'); } + +. { if (yytext[0] == '{') bracecnt++; + else if (yytext[0] == '[') brackcnt++; + else if (yytext[0] == '(') parencnt++; + RET(yylval.i = yytext[0]); /* everything else */ } + +<reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } +<reg>\n { vyyerror(":94:Newline in regular expression %.10s ...", cbuf); lineno++; BEGIN INITIAL; } +<reg>"/" { BEGIN INITIAL; + cbuf[clen] = 0; + yylval.s = tostring(cbuf); + awk_unput('/'); + RET(REGEXPR); } +<reg>. { CADD; } + +<str>\" { BEGIN INITIAL; + cbuf[clen] = 0; s = tostring(cbuf); + cbuf[clen] = ' '; cbuf[++clen] = 0; + yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); + RET(STRING); } +<str>\n { vyyerror(nlstring, cbuf); lineno++; BEGIN INITIAL; } +<str>"\\\"" { cbuf[clen++] = '"'; } +<str>"\\"n { cbuf[clen++] = '\n'; } +<str>"\\"t { cbuf[clen++] = '\t'; } +<str>"\\"f { cbuf[clen++] = '\f'; } +<str>"\\"r { cbuf[clen++] = '\r'; } +<str>"\\"b { cbuf[clen++] = '\b'; } +<str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ +<str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ +<str>"\\\\" { cbuf[clen++] = '\\'; } +<str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; + sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } +<str>"\\"x({H}+) { int n; /* ANSI permits any number! */ + sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } +<str>"\\". { cbuf[clen++] = yytext[1]; } +<str>. { CADD; } + +%% + +void +startreg(void) +{ + BEGIN reg; + clen = 0; +} + +/* input() and unput() were transcriptions of the standard lex + macros for input and output with additions for error message + printing. God help us all if someone changes how lex works. + - Luckily, the BSD people did most of the flex porting already + for oawk. +*/ + +unsigned char ebuf[300]; +unsigned char *ep = ebuf; + +#ifdef FLEX_SCANNER +int +awk_input(void) +{ + register int c; + extern unsigned char *lexprog; + + if (lexprog != NULL) { /* awk '...' */ + if (c = *lexprog & 0377) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + awk_yytchar = c; + return *ep++ = c; +} + +static void +awk_unput(int c) +{ + awk_yytchar = c; + unput(c); +} + + +#else /* !FLEX_SCANNER */ +int +input(void) +{ + register int c; + extern unsigned char *lexprog; + + if (yysptr > yysbuf) + c = U(*--yysptr) & 0377; + else if (lexprog != NULL) { /* awk '...' */ + if (c = *lexprog & 0377) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == '\n') + yylineno++; + else if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + return *ep++ = c; +} + +int +awk_input(void) +{ + return input(); +} + +void +unput(int c) +{ + yytchar = c; + if (yytchar == '\n') + yylineno--; + *yysptr++ = yytchar; + if (--ep < ebuf) + ep = ebuf + sizeof(ebuf) - 1; +} +#endif /* !FLEX_SCANNER */ + +static void +awk_unputstr(const char *s) +{ + int i; + + for (i = strlen(s)-1; i >= 0; i--) + awk_unput(s[i]); +} diff --git a/nawk/b.c b/nawk/b.c @@ -0,0 +1,174 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)b.c 1.6 (gritter) 5/15/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/b.c /main/uw7_nj/1 */ + +#include <stdio.h> +#include "awk.h" +#include <ctype.h> +#include "y.tab.h" +#include <pfmt.h> + +unsigned char *patbeg; +int patlen; + +static void +reprob(fa *f, int e) +{ + char msg[BUFSIZ]; + + regerror(e, &f->re, msg, sizeof(msg)); + error(MM_ERROR, ":104:Error in RE `%s': %s", f->restr, msg); +} + +static fa * +mkdfa(unsigned char *s) /* build DFA from s */ +{ + fa *pfa; + int i; + int flags; + + if ((pfa = (fa *)malloc(sizeof(fa))) == 0) + { + error(MM_ERROR, + "5:Regular expression too big: out of space in %s", s); + } + flags = posix ? REG_EXTENDED : REG_OLDERE | REG_OLDESC | REG_NOI18N; + flags |= REG_ONESUB | REG_BKTEMPTY | REG_BKTESCAPE | REG_ESCSEQ; + if ((i = regcomp(&pfa->re, (char *)s, flags)) != 0) + { + pfa->restr = s; + reprob(pfa, i); + } + pfa->restr = tostring(s); + pfa->use = 1; + pfa->notbol = 0; + return pfa; +} + +fa * +makedfa(unsigned char *s, int leftmost) /* build and cache DFA from s */ +{ + static fa *fatab[20]; + static int nfatab; + int i, n, u; + fa *pfa; + + if (compile_time) + return mkdfa(s); + /* + * Search for a match to those cached. + * If not found, save it, tossing least used one when full. + */ + for (i = 0; i < nfatab; i++) + { + if (strcmp((char *)fatab[i]->restr, (char *)s) == 0) + { + fatab[i]->use++; + return fatab[i]; + } + } + pfa = mkdfa(s); + if ((n = nfatab) < sizeof(fatab) / sizeof(fa *)) + nfatab++; + else + { + n = 0; + u = fatab[0]->use; + for (i = 1; i < sizeof(fatab) / sizeof(fa *); i++) + { + if (fatab[i]->use < u) + { + n = i; + u = fatab[n]->use; + } + } + free((void *)fatab[n]->restr); + regfree(&fatab[n]->re); + free((void *)fatab[n]); + } + fatab[n] = pfa; + return pfa; +} + +int +match(void *v, unsigned char *p) /* does p match f anywhere? */ +{ + int err; + fa *f = v; + + if ((err = regexec(&f->re, (char *)p, (size_t)0, (regmatch_t *)0, 0)) == 0) + return 1; + if (err != REG_NOMATCH) + reprob(f, err); + return 0; +} + +int +pmatch(void *v, unsigned char *p) /* find leftmost longest (maybe empty) match */ +{ + regmatch_t m; + int err; + fa *f = v; + + if ((err = regexec(&f->re, (char *)p, (size_t)1, &m, f->notbol)) == 0) + { + patbeg = &p[m.rm_so]; + patlen = m.rm_eo - m.rm_so; + return 1; + } + if (err != REG_NOMATCH) + reprob(f, err); + patlen = -1; + return 0; +} + +int +nematch(void *v, unsigned char *p) /* find leftmost longest nonempty match */ +{ + regmatch_t m; + int err; + fa *f = v; + + for (;;) + { + if ((err = regexec(&f->re, (char *)p, (size_t)1, &m, + f->notbol | REG_NONEMPTY)) == 0) + { + if ((patlen = m.rm_eo - m.rm_so) == 0) + { + p += m.rm_eo; + continue; + } + patbeg = &p[m.rm_so]; + return 1; + } + if (err != REG_NOMATCH) + reprob(f, err); + patlen = -1; + return 0; + } +} diff --git a/nawk/lib.c b/nawk/lib.c @@ -0,0 +1,852 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)lib.c 1.27 (gritter) 12/25/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/lib.c /main/uw7_nj/1 */ +/* from RCS Header: lib.c 1.2 91/06/25 */ + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> +#include <stdarg.h> +#include <wctype.h> +#include "asciitype.h" + +#undef RS + +static void eprint(void); + +#define getfval(p) (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) +#define getsval(p) (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) + +FILE *infile = NULL; +unsigned char *file = (unsigned char*) ""; +unsigned char *record; +unsigned char *recdata; +int recsize; +unsigned char *fields; + +int donefld; /* 1 = implies rec broken into fields */ +int donerec; /* 1 = record is valid (no flds have changed) */ + +Cell **fldtab; /* room for fields */ + +static Cell dollar0 = { + OCELL, CFLD, (unsigned char*) "$0", (unsigned char *)"", 0.0, REC|STR|DONTFREE +}; +static Cell FINIT = { + OCELL, CFLD, NULL, (unsigned char*) "", 0.0, FLD|STR|DONTFREE +}; + + +static int MAXFLD; /* number of allocated fields */ +int maxfld = 0; /* last used field */ +int argno = 1; /* current input argument number */ +extern Awkfloat *ARGC; + +static void growrec(unsigned char **, int *, int, unsigned char **, int); + +char badopen[] = ":11:Cannot open %s: %s"; + +/* Dynamic field and record allocation inspired by Bell Labs awk. */ +static void morefields(void) +{ + int i; + const int n = 32; + + fldtab = realloc(fldtab, (MAXFLD + n + 1) * sizeof *fldtab); + if (fldtab == NULL) + error(MM_ERROR, ":13:Record `%.20s...' has too many fields", + record); + recloc = fldtab[0]; + for (i = MAXFLD; i < MAXFLD + n; i++) { + fldtab[i] = malloc(sizeof **fldtab); + if (fldtab[i] == NULL) + error(MM_ERROR, + ":13:Record `%.20s...' has too many fields", + record); + *fldtab[i] = FINIT; + } + MAXFLD += n; +} + +void fldinit(void) +{ + record = recdata = malloc(recsize = CHUNK); + fields = malloc(recsize); + if (record == NULL || fields == NULL) + error(MM_ERROR, outofspace, "fldinit"); + *record = '\0'; + morefields(); + *fldtab[0] = dollar0; +} + +void initgetrec(void) +{ + extern unsigned char **start_delayed, **after_delayed; + unsigned char **pp; + int i; + unsigned char *p; + + /* first handle delayed name=val arguments */ + for (pp = start_delayed; pp != after_delayed; pp++) + setclvar(*pp); + for (i = 1; i < *ARGC; i++) { + if (!isclvar(p = getargv(i))) /* find 1st real filename */ + return; + setclvar(p); /* a commandline assignment before filename */ + argno++; + } + infile = stdin; /* no filenames, so use stdin */ + /* *FILENAME = file = (unsigned char*) "-"; */ +} + +int getrec(unsigned char **buf, int *bufsize) +{ + int c, saved; + static int firsttime = 1; + + if (firsttime) { + firsttime = 0; + initgetrec(); + } + dprintf( ("RS=<%s>, FS=<%s>, ARGC=%d, FILENAME=%s\n", + *RS ? *RS : tostring(""), + *FS ? *FS : tostring(""), + (int) *ARGC, + *FILENAME ? *FILENAME : tostring("")) ); + donefld = 0; + donerec = 1; + if (*bufsize == 0) { + if ((*buf = malloc(*bufsize = CHUNK)) == NULL) + error(MM_ERROR, outofspace, "getrec"); + **buf = '\0'; + } + saved = (*buf)[0]; + (*buf)[0] = 0; + while (argno < *ARGC || infile == stdin) { + dprintf( ("argno=%d, file=|%s|\n", argno, file) ) + ; + if (infile == NULL) { /* have to open a new file */ + file = getargv(argno); + if (*file == '\0') { /* it's been zapped */ + argno++; + continue; + } + if (isclvar(file)) { /* a var=value arg */ + setclvar(file); + argno++; + continue; + } + *FILENAME = file; + dprintf( ("opening file %s\n", file) ); + if (*file == '-' && *(file+1) == '\0') + infile = stdin; + else if ((infile = fopen((char *)file, "r")) == NULL) + error(MM_ERROR, badopen, file, strerror(errno)); + setfval(fnrloc, 0.0); + } + c = readrec(buf, bufsize, infile); + if (c != 0 || (*buf)[0] != '\0') { /* normal record */ + if (*buf == record) { + if (!(recloc->tval & DONTFREE)) + xfree(recloc->sval); + recloc->sval = record; + recloc->tval = REC | STR | DONTFREE; + (void)is2number(0, recloc); + } + setfval(nrloc, nrloc->fval+1); + setfval(fnrloc, fnrloc->fval+1); + return 1; + } + /* EOF arrived on this file; set up next */ + if (infile != stdin) + fclose(infile); + infile = NULL; + argno++; + } + /* + * POSIX.2 requires that NF stick with its last value + * at the start of the END code. The most straightforward + * way to do this is to restore the contents of record + * [==buf when called from program()] so that getnf() will + * recompute the same NF value unless something strange + * occurs. This has the side effect of $0...$NF *also* + * having sticky values into END, but that seems to match + * the spirit of POSIX.2's rule for NF. + */ + if (posix) + (*buf)[0] = saved; + return 0; /* true end of file */ +} + +int readrec(unsigned char **buf, int *bufsize, FILE *inf) + /* read one record into buf */ +{ + register int sep, c, k, m, n; + unsigned char *rr; + register int nrr; + wchar_t wc; + + next(wc, *RS, n); + if ((sep = **RS) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + if (*bufsize == 0) + growrec(buf, bufsize, CHUNK, NULL, 0); + for (rr = *buf, nrr = *bufsize; ; ) { + cont: for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c) + if (--nrr < n + 3) { + growrec(buf, bufsize, *bufsize + CHUNK, &rr, 0); + nrr += CHUNK; + } + if (c != EOF) { + /* + * Note: This code does not restrict occurences of + * the multibyte sequence in RS to the start of an + * input character. + */ + for (m = 1; m < n; m++) { + if ((c = getc(inf)) == EOF || c != (*RS)[m]) { + for (k = 0; k < m; k++) + *rr++ = (*RS)[k]; + nrr -= k; + if (c == EOF) + break; + *rr++ = c; + nrr--; + goto cont; + } + } + } + if (**RS == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + *rr++ = '\n'; + *rr++ = c; + } + /*if (rr > *buf + *bufsize) + error(MM_ERROR, ":12:Input record `%.20s...' too long", *buf);*/ + *rr = 0; + dprintf( ("readrec saw <%s>, returns %d\n", *buf, c == EOF + && rr == *buf ? 0 : 1) ); + return c == EOF && rr == *buf ? 0 : 1; +} + +unsigned char *getargv(int n) /* get ARGV[n] */ +{ + Cell *x; + unsigned char *s, temp[25]; + extern Array *ARGVtab; + + snprintf((char *)temp, sizeof temp, "%d", n); + x = setsymtab(temp, "", 0.0, STR, ARGVtab); + s = getsval(x); + dprintf( ("getargv(%d) returns |%s|\n", n, s) ); + return s; +} + +void setclvar(unsigned char *s) /* set var=value from s */ +{ + unsigned char *p; + Cell *q; + + for (p=s; *p != '='; p++) + ; + *p++ = 0; + p = qstring(p, '\0'); + q = setsymtab(s, p, 0.0, STR, symtab); + setsval(q, p); + (void)is2number(0, q); + dprintf( ("command line set %s to |%s|\n", s, p) ); +} + +static void cleanfld(int n1, int n2); +static int refldbld(unsigned char *rec, unsigned char *fs); + +void +fldbld(void) +{ + register unsigned char *r, *fr; + Cell **p; + wchar_t wc, sep; + int i, n; + + if (donefld) + return; + if (!(recloc->tval & STR)) + getsval(recloc); + r = recloc->sval; /* was record! */ + fr = fields; + i = 0; /* number of fields accumulated here */ + if ((sep = **FS) != '\0' && (next(sep, *FS, n), (*FS)[n] != '\0')) { + /* it's a regular expression */ + i = refldbld(r, *FS); + } else if (sep == ' ') { + for (i = 0; ; ) { + while (*r == ' ' || *r == '\t' || *r == '\n') + r++; + if (*r == 0) + break; + i++; + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + next(wc, r, n); + do { + do + *fr++ = *r++; + while (--n); + next(wc, r, n); + } while (wc != ' ' && wc != '\t' && wc != '\n' && + wc != '\0'); + *fr++ = 0; + } + *fr = 0; + } else if (*r != 0) { /* if 0, it's a null field */ + for (;;) { + i++; + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + while (next(wc, r, n), + wc != sep && wc != '\n' && wc != '\0') { + /* \n always a separator */ + do + *fr++ = *r++; + while (--n); + } + *fr++ = '\0'; + if (wc == '\0') + break; + r += n; + } + *fr = 0; + } + /*if (i >= MAXFLD) + error(MM_ERROR, ":13:Record `%.20s...' has too many fields", + record);*/ + /* clean out junk from previous record */ + cleanfld(i, maxfld); + maxfld = i; + donefld = 1; + for (p = &fldtab[1]; p <= &fldtab[0]+maxfld; p++) + (void)is2number(0, *p); + setfval(nfloc, (Awkfloat) maxfld); + if (dbg) + for (p = &fldtab[0]; p <= &fldtab[0]+maxfld; p++) + pfmt(stdout, MM_INFO, ":14:field %d: |%s|\n", p-&fldtab[0], + (*p)->sval); +} + +static void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */ +{ + static unsigned char *nullstat = (unsigned char *) ""; + register Cell **p, **q; + + for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) { + if (!((*p)->tval & DONTFREE)) + xfree((*p)->sval); + (*p)->tval = FLD | STR | DONTFREE; + (*p)->sval = nullstat; + } +} + +void newfld(int n) /* add field n (after end) */ +{ + /*if (n >= MAXFLD) + error(MM_ERROR, ":15:Creating too many fields", record);*/ + while (n >= MAXFLD) + morefields(); + cleanfld(maxfld, n); + maxfld = n; + setfval(nfloc, (Awkfloat) n); +} + +static int refldbld(unsigned char *rec, + unsigned char *fs) /* build fields from reg expr in FS */ +{ + unsigned char *fr; + int i; + fa *pfa; + + fr = fields; + *fr = '\0'; + if (*rec == '\0') + return 0; + pfa = makedfa(fs, 1); + dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, + fs) ); + pfa->notbol = 0; + for (i = 1; ; i++) { + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->tval = FLD | STR | DONTFREE; + fldtab[i]->sval = fr; + dprintf( ("refldbld: i=%d\n", i) ); + if (nematch(pfa, rec)) { + pfa->notbol = REG_NOTBOL; + dprintf( ("match %s (%d chars\n", + patbeg, patlen) ); + strncpy((char*) fr, (char*) rec, patbeg-rec); + fr += patbeg - rec + 1; + *(fr-1) = '\0'; + rec = patbeg + patlen; + } else { + dprintf( ("no match %s\n", rec) ); + strcpy((char*) fr, (char*) rec); + pfa->notbol = 0; + break; + } + } + return i; +} + +void recbld(void) +{ + int i; + unsigned char *r, *p; + + if (donerec == 1) + return; + r = recdata; + for (i = 1; i <= *NF; i++) { + p = getsval(fldtab[i]); + while ((*r = *p++)) { + if (++r >= &recdata[recsize]) { + recsize += CHUNK; + growrec(&recdata, &recsize, recsize, &r, 1); + } + } + if (i < *NF) + for ((p = *OFS); (*r = *p++); ) { + if (++r >= &recdata[recsize]) { + recsize += CHUNK; + growrec(&recdata, &recsize, + recsize, &r, 1); + } + } + } + *r = '\0'; + dprintf( ("in recbld FS=%o, recloc=%lo\n", **FS, + (long)recloc) ); + recloc->tval = REC | STR | DONTFREE; + recloc->sval = record = recdata; + dprintf( ("in recbld FS=%o, recloc=%lo\n", **FS, + (long)recloc) ); + dprintf( ("recbld = |%s|\n", record) ); + donerec = 1; +} + +Cell *fieldadr(int n) +{ + if (n < 0) + error(MM_ERROR, ":17:Trying to access field %d", n); + while (n >= MAXFLD) + morefields(); + return(fldtab[n]); +} + +int errorflag = 0; +char errbuf[200]; + +static int been_here = 0; +static char + atline[] = ":18: at source line %d", + infunc[] = ":19: in function %s"; + +void +vyyerror(const char *msg, ...) +{ + extern unsigned char *curfname; + va_list args; + + if (been_here++ > 2) + return; + va_start(args, msg); + vpfmt(stderr, MM_ERROR, msg, args); + pfmt(stderr, MM_NOSTD, atline, lineno); + if (curfname != NULL) + pfmt(stderr, MM_NOSTD, infunc, curfname); + fprintf(stderr, "\n"); + errorflag = 2; + eprint(); + + va_end(args); +} + +void +yyerror(char *s) +{ + extern unsigned char /**cmdname,*/ *curfname; + static int been_here = 0; + + if (been_here++ > 2) + return; + pfmt(stderr, (MM_ERROR | MM_NOGET), "%s", s); + pfmt(stderr, MM_NOSTD, atline, lineno); + if (curfname != NULL) + pfmt(stderr, MM_NOSTD, infunc, curfname); + fprintf(stderr, "\n"); + errorflag = 2; + eprint(); +} + +/*ARGSUSED*/ +void fpecatch(int signo) +{ + error(MM_ERROR, ":20:Floating point exception"); +} + +extern int bracecnt, brackcnt, parencnt; +static void bcheck2(int n, int c1, int c2); + +void bracecheck(void) +{ + int c; + static int beenhere = 0; + + if (beenhere++) + return; + while ((c = awk_input()) != EOF && c != '\0') + bclass(c); + bcheck2(bracecnt, '{', '}'); + bcheck2(brackcnt, '[', ']'); + bcheck2(parencnt, '(', ')'); +} + +static void bcheck2(int n, int c1, int c2) +{ + if (n == 1) + pfmt(stderr, MM_ERROR, ":21:Missing %c\n", c2); + else if (n > 1) + pfmt(stderr, MM_ERROR, ":22:%d missing %c's\n", n, c2); + else if (n == -1) + pfmt(stderr, MM_ERROR, ":23:Extra %c\n", c2); + else if (n < -1) + pfmt(stderr, MM_ERROR, ":24:%d extra %c's\n", -n, c2); +} + +void +error(int flag, const char *msg, ...) +{ + int errline; + extern Node *curnode; + /*extern unsigned char *cmdname;*/ + va_list args; + + fflush(stdout); + va_start(args, msg); + vpfmt(stderr, flag, msg, args); + putc('\n', stderr); + + if (compile_time != 2 && NR && *NR > 0) { + pfmt(stderr, MM_INFO, + ":25:Input record number %g", *FNR); + if (strcmp((char*) *FILENAME, "-") != 0) + pfmt(stderr, MM_NOSTD, + ":26:, file %s", *FILENAME); + fprintf(stderr, "\n"); + } + errline = 0; + if (compile_time != 2 && curnode) + errline = curnode->lineno; + else if (compile_time != 2 && lineno) + errline = lineno; + if (errline) + pfmt(stderr, MM_INFO, ":27:Source line number %d\n", errline); + eprint(); + if (flag == MM_ERROR) { + if (dbg) + abort(); + exit(2); + } + va_end(args); +} + +static void eprint(void) /* try to print context around error */ +{ + unsigned char *p, *q, *r; + int c, episnul; + static int been_here = 0; + extern unsigned char ebuf[300], *ep; + + if (compile_time == 2 || compile_time == 0 || been_here++ > 0) + return; + episnul = ep > ebuf && ep[-1] == '\0'; + p = ep - 1 - episnul; + if (p > ebuf && *p == '\n') + p--; + for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--) + ; + while (*p == '\n') + p++; + if (0 /* posix */) + pfmt(stderr, MM_INFO, ":28:Context is\n\t"); + else + pfmt(stderr, MM_INFO|MM_NOSTD, ":2228: context is\n\t"); + for (q=ep-1-episnul; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) + ; + for (r = q; r < ep; r++) { + if (*r != ' ' && *r != '\t' && *r != '\n') { + for ( ; p < q; p++) + if (*p) + putc(*p, stderr); + break; + } + } + fprintf(stderr, " >>> "); + for ( ; p < ep; p++) + if (*p) + putc(*p, stderr); + fprintf(stderr, " <<< "); + if (*ep) + while ((c = awk_input()) != '\n' && c != '\0' && c != EOF) { + putc(c, stderr); + bclass(c); + } + putc('\n', stderr); + ep = ebuf; +} + +void bclass(int c) +{ + switch (c) { + case '{': bracecnt++; break; + case '}': bracecnt--; break; + case '[': brackcnt++; break; + case ']': brackcnt--; break; + case '(': parencnt++; break; + case ')': parencnt--; break; + } +} + +double errcheck(double x, unsigned char *s) +{ + if (errno == EDOM) { + errno = 0; + error(MM_WARNING, ":29:%s argument out of domain", s); + x = 1; + } else if (errno == ERANGE) { + errno = 0; + error(MM_WARNING, ":30:%s result out of range", s); + x = 1; + } + return x; +} + +void PUTS(unsigned char *s) { + dprintf( ("%s\n", s) ); +} + +int isclvar(unsigned char *s) /* is s of form var=something? */ +{ + unsigned char *os = s; + + for ( ; *s; s++) + if (!(alnumchar(*s) || *s == '_')) + break; + return *s == '=' && s > os && *(s+1) != '=' && !digitchar(*os); +} + +int is2number(register unsigned char *s, Cell *p) +{ + unsigned char *after; + Awkfloat val; + + /* + * POSIX.2 says leading <blank>s are skipped and that + * <blank> is at least ' ' and '\t' and can include other + * characters, but not in the "POSIX" (aka "C") locale. + * + * The historic code skipped those two and newline. So, + * unless it's noticed by some test suite, we choose to + * keep things compatible. To be safe, reject the string + * if it starts with other white space characters since + * strtod() skips any form of white space. + * + * Permit similarly spelled trailing white space for + * compatibility. + */ + if (p != 0) + s = p->sval; + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; + if (isspace(*s)) + return 0; + /* + * Reject hexadecimal numbers, infinity and NaN strings which + * are recognized by C99 strtod() implementations. + */ + switch (*s) { + case '0': + if (s[1] == 'x' || s[1] == 'X') + return 0; + break; + case 'i': + case 'I': + if (strncasecmp((char *)s, "inf", 3) == 0) + return 0; + break; + case 'n': + case 'N': + if (strncasecmp((char *)s, "NaN", 3) == 0) + return 0; + break; + } + val = strtod((char *)s, (char **)&after); + for (s = after; *s == ' ' || *s == '\t' || *s == '\n'; s++) + ; + if (*s != '\0') + return 0; + if (p != 0) { + p->fval = val; + p->tval |= NUM; + } + return 1; +} + +double +awk_atof(const char *s) +{ + wchar_t wc; + int n; + + while (*s) { + next(wc, s, n); + if (!(mb_cur_max > 1 ? iswspace(wc) : isspace(wc))) + break; + s += n; + } + /* + * Return 0 for hexadecimal numbers, infinity and NaN strings which + * are recognized by C99 atof() implementations. + */ + switch (*s) { + case '0': + if (s[1] == 'x' || s[1] == 'X') + return 0; + break; + case 'i': + case 'I': + if (strncasecmp(s, "inf", 3) == 0) + return 0; + break; + case 'n': + case 'N': + if (strncasecmp(s, "NaN", 3) == 0) + return 0; + break; + } + return atof(s); +} + +unsigned char *makerec(const unsigned char *data, int size) +{ + if (!(recloc->tval & DONTFREE)) + xfree(recloc->sval); + if (recsize < size) + growrec(&recdata, &recsize, size, NULL, 0); + record = recdata; + strcpy((char*)record, (char*)data); + recloc->sval = record; + recloc->tval = REC | STR | DONTFREE; + donerec = 1; donefld = 0; + return record; +} + +static void growrec(unsigned char **buf, int *bufsize, int newsize, + unsigned char **ptr, int bld) +{ + unsigned char *np, *op; + + op = *buf; + if ((np = realloc(op, *bufsize = newsize)) == 0) { + oflo: if (bld) + error(MM_ERROR, + ":16:Built giant record `%.20s...'", + *buf); + else + error(MM_ERROR, + ":12:Input record `%.20s...' too long", + *buf); + } + if (ptr && *ptr) + *ptr = &np[*ptr - op]; + if (record == op) + record = np; + if (recdata == op) { + recdata = np; + recsize = *bufsize; + if ((fields = realloc(fields, recsize)) == NULL) + goto oflo; + } + if (fldtab[0]->sval == op) + fldtab[0]->sval = np; + if (recloc->sval == op) + recloc->sval = np; + *buf = np; +} + +int +vpfmt(FILE *stream, long flags, const char *fmt, va_list ap) +{ + extern char *pfmt_label__; + int n = 0; + + if ((flags & MM_NOGET) == 0) { + if (*fmt == ':') { + do + fmt++; + while (*fmt != ':'); + fmt++; + } + } + if ((flags & MM_NOSTD) == 0) + n += fprintf(stream, "%s: ", pfmt_label__); + if ((flags & MM_ACTION) == 0 && isupper(*fmt&0377)) + n += fprintf(stream, "%c", tolower(*fmt++&0377)); + n += vfprintf(stream, fmt, ap); + return n; +} diff --git a/nawk/main.c b/nawk/main.c @@ -0,0 +1,215 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)main.c 1.14 (gritter) 12/19/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/main.c /main/uw7_nj/2 */ +/* from RCS Header: main.c 1.3 91/08/12 */ + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <signal.h> +#include <pfmt.h> +#include <errno.h> +#include <string.h> +#include <locale.h> +#include <langinfo.h> +#include <libgen.h> + +#define CMDCLASS ""/*"UX:"*/ /* Command classification */ + +#include <locale.h> + +#include "awk.h" +#include "y.tab.h" + +int dbg = 0; +unsigned char *cmdname; /* gets argv[0] for error messages */ +extern FILE *yyin; /* lex input file */ +static FILE *awk_yyin; +extern FILE *yyout; +unsigned char *lexprog; /* points to program argument if it exists */ +unsigned char **start_delayed; /* first name=val argument delayed for BEGIN code */ +unsigned char **after_delayed; /* first argument after the delayed name=val's */ +extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ +int compile_time = 2; /* for error printing: */ + /* 2 = cmdline, 1 = compile, 0 = running */ + +#define MAXPFILE 100 +unsigned char *pfile[MAXPFILE]; /* program filenames from -f's */ +int npfile = 0; /* number of filenames */ +int curpfile = 0; /* current filename */ + +int mb_cur_max; /* MB_CUR_MAX, for acceleration */ + +extern const char badopen[]; + +int main(int argc, unsigned char *argv[], unsigned char *envp[]) +{ + unsigned char *fs = NULL; + char label[MAXLABEL+1]; /* Space for the catalogue label */ + + (void)setlocale(LC_COLLATE, ""); + (void)setlocale(LC_CTYPE, ""); + /*(void)setlocale(LC_MESSAGES, "");*/ + (void)setlocale(LC_NUMERIC, "POSIX"); /* redundant */ + mb_cur_max = MB_CUR_MAX; + cmdname = (unsigned char *)basename((char *)argv[0]); + (void)strcpy(label, CMDCLASS); + (void)strncat(label, (char*) cmdname, (MAXLABEL - sizeof(CMDCLASS) - 1)); + (void)setcat("uxawk"); + (void)setlabel(label); + /*version = (char*) gettxt(":31", "version Oct 11, 1989");*/ + if (argc == 1) { + if (0 /* posix */) + pfmt(stderr, MM_ERROR, ":32:Incorrect usage\n"); + pfmt(stderr, MM_ACTION | (0 /* posix */ ? 0 : MM_NOSTD), + ":210107:Usage: %s [-f programfile | 'program'] [-Ffieldsep] [-v var=value] [files]\n", + cmdname); + exit(1); + } + signal(SIGFPE, fpecatch); + awk_yyin = NULL; + yyout = stdout; + fldinit(); + syminit(); + while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { + if (strcmp((char*) argv[1], "--") == 0) { /* explicit end of args */ + argc--; + argv++; + break; + } + switch (argv[1][1]) { + case 'f': /* next argument is program filename */ + if (npfile >= MAXPFILE) + error(MM_ERROR, ":106:Too many program filenames"); + if (argv[1][2] != '\0') { /* arg is -fname */ + pfile[npfile++] = &argv[1][2]; + } else { + argc--; + argv++; + if (argc <= 1) + error(MM_ERROR, ":34:No program filename"); + pfile[npfile++] = argv[1]; + } + break; + case 'F': /* set field separator */ + if (argv[1][2] != 0) { /* arg is -Fsomething */ + if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */ + fs = (unsigned char *) "\t"; + else if (argv[1][2] != 0) + fs = &argv[1][2]; + } else { /* arg is -F something */ + argc--; argv++; + if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */ + fs = (unsigned char *) "\t"; + else if (argc > 1 && argv[1][0] != 0) + fs = &argv[1][0]; + } + if (fs == NULL || *fs == '\0') + error(MM_WARNING, ":35:Field separator FS is empty"); + break; + case 'v': /* -v a=1 to be done NOW. one -v for each */ + if (argv[1][2] != '\0') { /* arg is -va=1 */ + if (!isclvar(&argv[1][2])) + error(MM_ERROR, ":105:malformed -v assignment"); + setclvar(&argv[1][2]); + } else if (--argc > 1 && isclvar((++argv)[1])) { + setclvar(argv[1]); + } else { + error(MM_ERROR, ":105:malformed -v assignment"); + } + break; + case 'd': + dbg = atoi((char *)&argv[1][2]); + if (dbg == 0) + dbg = 1; + pfmt(stdout, (MM_INFO | MM_NOGET), "%s %s\n", + cmdname, version); + break; + default: + pfmt(stderr, MM_WARNING, + ":36:Unknown option %s ignored\n", argv[1]); + break; + } + argc--; + argv++; + } + /* argv[1] is now the first argument */ + if (npfile == 0) { /* no -f; first argument is program */ + if (argc <= 1) + error(MM_ERROR, ":37:No program given"); + dprintf( ("program = |%s|\n", argv[1]) ); + lexprog = argv[1]; + argc--; + argv++; + } + /* hold leading name=val arguments until just after BEGIN */ + if (posix && argc > 1 && isclvar(argv[1])) { + start_delayed = &argv[0]; + do { + argv[0] = argv[1]; + argv++; + } while (--argc > 1 && isclvar(argv[1])); + after_delayed = &argv[0]; + } + compile_time = 1; + argv[0] = cmdname; /* put prog name at front of arglist */ + dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); + arginit(argc, argv); + envinit(envp); + yyparse(); + if (fs) + *FS = tostring(qstring(fs, '\0')); + dprintf( ("errorflag=%d\n", errorflag) ); + if (errorflag == 0) { + compile_time = 0; + (void)setlocale(LC_NUMERIC, ""); + run(winner); + } else + bracecheck(); + exit(errorflag); +} + +int pgetc(void) /* get program character */ +{ + int c; + + for (;;) { + if (awk_yyin == NULL) { + if (curpfile >= npfile) + return EOF; + if (!strcmp((char *)pfile[curpfile], "-")) + awk_yyin = stdin; + else if ((awk_yyin = fopen((char *) pfile[curpfile], "r")) == NULL) + error(MM_ERROR, badopen, + pfile[curpfile], strerror(errno)); + } + if ((c = getc(awk_yyin)) != EOF) + return c; + awk_yyin = NULL; + curpfile++; + } +} diff --git a/nawk/maketab.c b/nawk/maketab.c @@ -0,0 +1,177 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)maketab.c 1.11 (gritter) 12/4/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/maketab.c /main/uw7_nj/1 */ +/* from RCS Header: maketab.c 1.2 91/06/25 */ +static const char sccsid[] = "@(#)maketab.c 1.11 (gritter) 12/4/04"; + +#include <stdio.h> +#include <string.h> +#include "awk.h" +#include "y.tab.h" + +struct xx +{ int token; + char *name; + char *pname; +} proc[] = { + { PROGRAM, "program", NULL }, + { BOR, "boolop", " || " }, + { AND, "boolop", " && " }, + { NOT, "boolop", " !" }, + { NE, "relop", " != " }, + { EQ, "relop", " == " }, + { LE, "relop", " <= " }, + { LT, "relop", " < " }, + { GE, "relop", " >= " }, + { GT, "relop", " > " }, + { ARRAY, "array", NULL }, + { INDIRECT, "indirect", "$(" }, + { SUBSTR, "substr", "substr" }, + { SUB, "sub", "sub" }, + { GSUB, "gsub", "gsub" }, + { INDEX, "sindex", "sindex" }, + { SPRINTF, "awsprintf", "sprintf " }, + { ADD, "arith", " + " }, + { MINUS, "arith", " - " }, + { MULT, "arith", " * " }, + { DIVIDE, "arith", " / " }, + { MOD, "arith", " % " }, + { UMINUS, "arith", " -" }, + { POWER, "arith", " **" }, + { PREINCR, "incrdecr", "++" }, + { POSTINCR, "incrdecr", "++" }, + { PREDECR, "incrdecr", "--" }, + { POSTDECR, "incrdecr", "--" }, + { CAT, "cat", " " }, + { PASTAT, "pastat", NULL }, + { PASTAT2, "dopa2", NULL }, + { MATCH, "matchop", " ~ " }, + { NOTMATCH, "matchop", " !~ " }, + { MATCHFCN, "matchop", "matchop" }, + { INTEST, "intest", "intest" }, + { PRINTF, "aprintf", "printf" }, + { PRINT, "print", "print" }, + { DELETE, "delete", "delete" }, + { SPLIT, "split", "split" }, + { ASSIGN, "assign", " = " }, + { ADDEQ, "assign", " += " }, + { SUBEQ, "assign", " -= " }, + { MULTEQ, "assign", " *= " }, + { DIVEQ, "assign", " /= " }, + { MODEQ, "assign", " %= " }, + { POWEQ, "assign", " ^= " }, + { CONDEXPR, "condexpr", " ?: " }, + { IF, "ifstat", "if(" }, + { WHILE, "whilestat", "while(" }, + { FOR, "forstat", "for(" }, + { DO, "dostat", "do" }, + { IN, "instat", "instat" }, + { NEXT, "jump", "next" }, + { EXIT, "jump", "exit" }, + { BREAK, "jump", "break" }, + { CONTINUE, "jump", "continue" }, + { RETURN, "jump", "ret" }, + { BLTIN, "bltin", "bltin" }, + { CALL, "call", "call" }, + { ARG, "arg", "arg" }, + { VARNF, "getnf", "NF" }, + { GETLINE, "getline", "getline" }, + { 0, "", "" }, +}; + +#define SIZE LASTTOKEN - FIRSTTOKEN + 1 +char *table[SIZE]; +char *names[SIZE]; + +int main(void) +{ + struct xx *p; + int i, n, tok; + char c; + FILE *fp; + char buf[100], name[100], def[100]; + + printf("#include <stdio.h>\n"); + printf("#include \"awk.h\"\n"); + printf("#include \"y.tab.h\"\n\n"); +/* printf("Cell *nullproc();\n"); + for (i = SIZE; --i >= 0; ) + names[i] = ""; + for (p=proc; p->token!=0; p++) + if (p == proc || strcmp(p->name, (p-1)->name)) + printf("extern Cell *%s();\n", p->name);*/ + + if ((fp = fopen("y.tab.h", "r")) == NULL) { + fprintf(stderr, "maketab can't open y.tab.h!\n"); + exit(1); + } + printf("static unsigned char *printname[%d] = {\n", SIZE); + i = 0; + while (fgets(buf, sizeof buf, fp) != NULL) { + if (*buf == '\n') + continue; + n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); + if ((c != '#' || n != 4) && (strcmp(def,"define") != 0)) /* not a valid #define */ + continue; + if (strncmp(name, "YY", 2) == 0 || strncmp(name, "yy", 2) == 0) + continue; + if (tok < FIRSTTOKEN || tok > LASTTOKEN) { + continue; + /* + fprintf(stderr, "maketab funny token %d %s\n", tok, buf); + exit(1); + */ + } + names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); + strcpy(names[tok-FIRSTTOKEN], name); + printf("\t(unsigned char *) \"%s\",\t/* %d */\n", name, tok); + i++; + } + printf("};\n\n"); + + for (p=proc; p->token!=0; p++) + table[p->token-FIRSTTOKEN] = p->name; + printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); + for (i=0; i<SIZE; i++) + if (table[i]==0) + printf("\tnullproc,\t/* %s */\n", names[i]); + else + printf("\t%s,\t/* %s */\n", table[i], names[i]); + printf("};\n\n"); + + printf("unsigned char *tokname(int n)\n"); /* print a tokname() function */ + + printf("{\n"); + printf(" static unsigned char buf[100];\n\n"); + printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); + printf(" snprintf((char *)buf, sizeof buf, \"token %%d\", n);\n"); + printf(" return buf;\n"); + printf(" }\n"); + printf(" return printname[n-257];\n"); + printf("}\n"); + exit(0); +} diff --git a/nawk/mkfile b/nawk/mkfile @@ -0,0 +1,53 @@ +BIN = nawk +OBJ = awk.lx.o b.o lib.o main.o parse.o proctab.o run.o tran.o \ + awk.g.o version.o +LOCAL_CFLAGS = -DSU3 +LOCAL_LDFLAGS = -lm +CLEAN_FILES = maketab maketab.o awk.g.c y.tab.h awk.lx.c proctab.c awk.1 +DEPS = libcommon libuxre lex yacc ed + +<$mkbuild/mk.common + +INSTALL_BIN = nawk +INSTALL_MAN1 = nawk.1 +INSTALL_SYMLINK = \ + nawk $BINDIR/awk \ + nawk.1 $MANDIR/man1/awk.1 + +nawk: $OBJ + +awk.g.c:Q: awk.g.y + echo YACC $target + $YACC -d awk.g.y + mv -f y.tab.c awk.g.c + +y.tab.h:Q: awk.g.c + (echo '1i'; echo '#include <inttypes.h>'; echo '.'; echo 'w';) | \ + $ED -s y.tab.h + +maketab:Q: maketab.o + echo CC $target + $CC $LDFLAGS maketab.o -o maketab + +proctab.c: maketab + ./maketab > proctab.c + +awk.lx.c:Q: awk.lx.l + echo LEX $target + $LEX -t awk.lx.l > awk.lx.c + +maketab.o:Q: maketab.c + echo CC $target + $CC -DIN_MAKETAB -c maketab.c + +awk.g.o: awk.h y.tab.h +awk.lx.o: awk.h y.tab.h +b.o: awk.h y.tab.h +lib.o: awk.h y.tab.h +main.o: awk.h y.tab.h +maketab.o: awk.h y.tab.h +parse.o: awk.h y.tab.h +proctab.o: awk.h y.tab.h +run.o: awk.h y.tab.h +tran.o: awk.h y.tab.h +version.o: awk.h y.tab.h diff --git a/nawk/nawk.1 b/nawk/nawk.1 @@ -0,0 +1,585 @@ +.\" +.\" Sccsid @(#)nawk.1 1.21 (gritter) 2/6/05 +.\" Derived from awk.1, Bell Labs: +.\" +.\" Copyright (C) Lucent Technologies 1997 +.\" All Rights Reserved +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that the above copyright notice appear in all +.\" copies and that both that the copyright notice and this +.\" permission notice and warranty disclaimer appear in supporting +.\" documentation, and that the name Lucent Technologies or any of +.\" its entities not be used in advertising or publicity pertaining +.\" to distribution of the software without specific, written prior +.\" permission. +.\" +.\" LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +.\" IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +.\" SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +.\" IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +.\" THIS SOFTWARE. +.TH NAWK 1 "2/6/05" "Heirloom Toolchest" "User Commands" +.SH NAME +nawk \- pattern scanning and processing language +.SH SYNOPSIS +.HP +.ad l +\fBnawk\fR +[\fB\-f \fIprogfile\fR | \fI'prog'\fR] +[\fB\-F\fIfieldsep\fR] +[\fB\-v \fIvar=value\fR] +[\fIfile . . .\fR] +.br +.ad b +.SH DESCRIPTION +.I Nawk +scans each input +.I file +for lines that match any of a set of patterns specified literally in +.IR prog +or in one or more files +specified as +.B \-f +.IR progfile . +With each pattern +there can be an associated action that will be performed +when a line of a +.I file +matches the pattern. +Each line is matched against the +pattern portion of every pattern-action statement; +the associated action is performed for each matched pattern. +The file name +.B \- +means the standard input. +Any +.IR file +of the form +.I var=value +is treated as an assignment, not a filename, +and is executed at the time it would have been opened if it were a filename +.RB ( /usr/5bin/s42/awk , +.BR /usr/5bin/posix/awk , +and +.B /usr/5bin/posix2001/awk +only). +The option +.B \-v +followed by +.I var=value +is an assignment to be done before +.I prog +is executed; +any number of +.B \-v +options may be present. +The +.B \-F +.IR fs +option defines the input field separator to be the regular expression +.IR fs. +.PP +An input line is normally made up of fields separated by white space, +or by regular expression +.BR FS . +The fields are denoted +.BR $1 , +.BR $2 , +\&..., while +.B $0 +refers to the entire line. +.PP +A pattern-action statement has the form +.IP +.IB pattern " { " action " }" +.PP +A missing +.BI { " action " } +means print the line; +a missing pattern always matches. +Pattern-action statements are separated by newlines or semicolons. +.PP +An action is a sequence of statements. +A statement can be one of the following: +.PP +.\".ta \w'\f(CWdelete array[subscript]'u +.RS +.nf +\fBif (\fI expression \fB)\fI statement \fR[ \fBelse\fI statement \fR] +\fBwhile (\fI expression \fB)\fI statement\fR +\fBfor (\fI expression \fB;\fI expression \fB;\fI expression \fB)\fI statement\fR +\fBfor (\fI var \fBin\fI array \fB)\fI statement\fR +\fBdo\fI statement \fBwhile (\fI expression \fB)\fR +\fBbreak\fR +\fBcontinue\fR +\fB{\fR [\fIstatement \fR...] \fB}\fR +\fIexpression\fR # commonly \fIvar \fB=\fI expression\fR +\fBprint\fR [\fIexpression-list\fR] [\fB>\fI expression\fR] +\fBprintf\fI format \fR[\fB,\fI expression-list\fR] [\fB>\fI expression\fR] +\fBnext \fR# skip remaining patterns on this input line +\fBdelete\fI array\fB[\fIsubscript\fB]\fR # delete an array element +\fBexit\fR [\fIexpr\fR] # exit immediately; status is \fIexpr\fR +\fBreturn\fR [\fIexpr\fR] +.fi +.RE +.br +.DT +.PP +Statements are terminated by +semicolons, newlines or right braces. +An empty +.I expression-list +stands for +.BR $0 . +String constants are quoted \&\f(CW"\ "\fR, +with the usual C escapes recognized within. +Expressions take on string or numeric values as appropriate, +and are built using the operators +.B + \- * / % ^ +(exponentiation), and concatenation (indicated by white space). +The operators +.B ! ++ \-\- += \-= *= +.B /= %= ^= > >= < +.B <= == != ?: +are also available in expressions. +Variables may be scalars, array elements +(denoted \fIx\fB[\fIi\fB]\fR) +or fields. +Variables are initialized to the null string. +Array subscripts may be any string, +not necessarily numeric; +this allows for a form of associative memory. +Multiple subscripts such as +\fB[\fIi\fB,\fIj\fB,\fIk\fB]\fR +are permitted; the constituents are concatenated, +separated by the value of +.BR SUBSEP . +.PP +The +.B print +statement prints its arguments on the standard output +(or on a file if +.BI > file +or +.BI >> file +is present or on a pipe if +.BI | cmd +is present), separated by the current output field separator, +and terminated by the output record separator. +.I file +and +.I cmd +may be literal names or parenthesized expressions; +identical string values in different statements denote +the same open file. +The +.B printf +statement formats its expression list according to the format +(see +.IR printf (3)) . +The built-in function +.BI close( expr ) +closes the file or pipe +.IR expr . +.PP +The mathematical functions +.BR exp , +.BR log , +.BR sqrt , +.BR sin , +.BR cos , +and +.BR atan2 +are built in. +Other built-in functions: +.\".TF length +.TP +.B gsub +same as +.B sub +except that all occurrences of the regular expression +are replaced; +.B sub +and +.B gsub +return the number of replacements. +.TP +.BI index( s , " t" ) +the position in +.I s +where the string +.I t +occurs, or 0 if it does not. +.TP +.B int +truncates to an integer value +.TP +.B length +the length of its argument +taken as a string, +or of +.B $0 +if no argument. +.TP +.BI match( s , " r" ) +the position in +.I s +where the regular expression +.I r +occurs, or 0 if it does not. +The variables +.B RSTART +and +.B RLENGTH +are set to the position and length of the matched string. +.TP +.B rand +random number on (0,1) +.TP +\fBsplit(\fIs\fB, \fIa\fB, \fIfs\fB)\fR +splits the string +.I s +into array elements +.IB a [1] , +.IB a [2] , +\&..., +.IB a [ n ] , +and returns +.IR n . +The separation is done with the regular expression +.I fs +or with the field separator +.B FS +if +.I fs +is not given. +.TP +\fBsprintf(\fIfmt\fB, \fIexpr\fB, \fI...\fB)\fR +the string resulting from formatting +.I expr ... +according to the +.IR printf (3) +format +.I fmt +.TP +.B srand +sets seed for +.B rand +and returns the previous seed. +.TP +\fBsub(\fIr\fB, \fIt\fB, \fIs\fB)\fR +substitutes +.I t +for the first occurrence of the regular expression +.I r +in the string +.IR s . +If +.I s +is not given, +.B $0 +is used. +.TP +\fBsubstr(\fIs\fB, \fIm\fB, \fIn\fB)\fR +the +.IR n -character +substring of +.I s +that begins at position +.IR m +counted from 1. +.TP +.BI system( cmd ) +executes +.I cmd +and returns its exit status +.TP +.BI tolower( str ) +returns a copy of +.I str +with all upper-case characters translated to their +corresponding lower-case equivalents. +.TP +.BI toupper( str ) +returns a copy of +.I str +with all lower-case characters translated to their +corresponding upper-case equivalents. +.PD +.PP +The ``function'' +.B getline +sets +.B $0 +to the next input record from the current input file; +.B getline +.BI < file +sets +.B $0 +to the next record from +.IR file . +.B getline +.I x +sets variable +.I x +instead. +Finally, +.IB cmd " |getline" +pipes the output of +.I cmd +into +.BR getline ; +each call of +.B getline +returns the next line of output from +.IR cmd . +In all cases, +.B getline +returns 1 for a successful input, +0 for end of file, and \-1 for an error. +.PP +Additional functions may be defined +(at the position of a pattern-action statement) thus: +.IP +\fBfunction \fIfoo\fB(\fIa\fB, \fIb\fB, \fIc\fB) +{ \fI...\fB; return \fIx\fB }\fR +.PP +or: +.IP +\fBfunc \fIfoo\fB(\fIa\fB, \fIb\fB, \fIc\fB) +{ \fI...\fB; return \fIx\fB }\fR +.PP +Parameters are passed by value if scalar and by reference if array name; +functions may be called recursively. +Parameters are local to the function; all other variables are global. +Thus local variables may be created by providing excess parameters in +the function definition. +.PP +Patterns are arbitrary Boolean combinations +(with +.BR "! || &&" ) +of regular expressions and +relational expressions. +Regular expressions are +full regular expressions with +.B /usr/5bin/nawk +and +extended regular expressions with +.BR /usr/5bin/s42/awk , +.BR /usr/5bin/posix/awk , +and +.BR /usr/5bin/posix2001/awk ; +both are as described in +.IR egrep (1). +Isolated regular expressions +in a pattern apply to the entire line. +Regular expressions may also occur in +relational expressions, using the operators +.BR ~ +and +.BR !~ . +.BI / re / +is a constant regular expression; +any string (constant or variable) may be used +as a regular expression, except in the position of an isolated regular expression +in a pattern. +For +.BR /usr/5bin/posix2001/awk , +regular expressions may be part of arithmetic expressions. +.PP +A pattern may consist of two patterns separated by a comma; +in this case, the action is performed for all lines +from an occurrence of the first pattern +though an occurrence of the second. +.PP +A relational expression is one of the following: +.IP +.I expression matchop regular-expression +.br +.I expression relop expression +.br +.IB expression " in " array-name +.br +.BI ( expr , expr,... ") in " array-name +.PP +where a relop is any of the six relational operators in C, +and a matchop is either +.B ~ +(matches) +or +.B !~ +(does not match). +A conditional is an arithmetic expression, +a relational expression, +or a Boolean combination +of these. +.PP +The special patterns +.B BEGIN +and +.B END +may be used to capture control before the first input line is read +and after the last. +.B BEGIN +and +.B END +do not combine with other patterns. +.PP +Variable names with special meanings: +.\".TF FILENAME +.TP 10 +.B ARGC +argument count, assignable +.TP 10 +.B ARGV +argument array, assignable; +non-null members are taken as filenames +.TP 10 +.B CONVFMT +.RB ( /usr/5bin/s42/awk , +.BR /usr/5bin/posix2001/awk , +and +.B /usr/5bin/posix/awk +only) +conversion format used when converting numbers +(default +.BR "%.6g" ) +.TP 10 +.B ENVIRON +array of environment variables; subscripts are names. +.TP 10 +.B FILENAME +the name of the current input file +.TP 10 +.B FNR +ordinal number of the current record in the current file +.TP 10 +.B FS +regular expression used to separate fields; also settable +by option +.BI \-F fs. +.TP 10 +.BR NF +number of fields in the current record +.TP 10 +.B NR +ordinal number of the current record +.TP 10 +.B OFMT +output format for numbers (default +.BR "%.6g" ) +.TP 10 +.B OFS +output field separator (default blank) +.TP 10 +.B ORS +output record separator (default newline) +.TP 10 +.B RS +input record separator (default newline) +.TP 10 +.B SUBSEP +separates multiple subscripts (default 034) +.PD +.SH EXAMPLES +.TP +.nf +length($0) > 72 +.br +.fi +Print lines longer than 72 characters. +.TP +.nf +{ print $2, $1 } +.br +.fi +Print first two fields in opposite order. +.PP +.nf +BEGIN { FS = ",[ \et]*|[ \et]+" } + { print $2, $1 } +.br +.fi +.ns +.IP +Same, with input fields separated by comma and/or blanks and tabs. +.PP +.nf + { s += $1 } +END { print "sum is", s, " average is", s/NR } +.fi +.br +.ns +.IP +Add up first column, print sum and average. +.TP +.nf +/start/, /stop/ +.br +.fi +Print all lines between start/stop pairs. +.PP +.nf +BEGIN { # Simulate echo(1) + for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i] + printf "\en" + exit } +.fi +.br +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in regular expressions +as well as string comparison. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters, +the availability and composition of character classes +in regular expressions, +and the case mapping for the toupper() and tolower() functions. +.TP +.B LC_NUMERIC +Determine the radix character used when interpreting numeric input, +performing conversions between numeric and string values +and formatting numeric output. +Regardless of locale, the period character +(the decimal-point character of the C locale) +is the decimal-point character recognized in processing awk programs. +.SH SEE ALSO +egrep(1), +lex(1), +oawk(1), +sed(1), +printf(3), +locale(7) +.br +A. V. Aho, B. W. Kernighan, P. J. Weinberger, +.I +The AWK Programming Language, +Addison-Wesley, 1988. ISBN 0-201-07981-X +.SH NOTES +There are no explicit conversions between numbers and strings. +To force an expression to be treated as a number add 0 to it; +to force it to be treated as a string concatenate +\&\fB""\fR to it. +.\".sp +.\"The scope rules for variables in functions are a botch; +.\"the syntax is worse. +.PP +The LC_COLLATE variable has currently no effect in regular expressions. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. diff --git a/nawk/parse.c b/nawk/parse.c @@ -0,0 +1,248 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)parse.c 1.7 (gritter) 12/4/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/parse.c /main/uw7_nj/1 */ +/* from RCS Header: parse.c 1.2 91/06/25 */ + +#define DEBUG +#include <stdio.h> +#include <string.h> +#include <pfmt.h> +#include "awk.h" +#include "y.tab.h" + +Node *nodealloc(int n) +{ + register Node *x; + x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); + if (x == NULL) + error(MM_ERROR, outofspace, "nodealloc"); + x->nnext = NULL; + x->lineno = lineno; + return(x); +} + +Node *exptostat(Node *a) +{ + a->ntype = NSTAT; + return(a); +} + +Node *node1(int a, Node *b) +{ + register Node *x; + x = nodealloc(1); + x->nobj = a; + x->narg[0]=b; + return(x); +} + +Node *node2(int a, Node *b, Node *c) +{ + register Node *x; + x = nodealloc(2); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + return(x); +} + +Node *node3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = nodealloc(3); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + return(x); +} + +Node *node4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = nodealloc(4); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + return(x); +} + +Node *stat3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = node3(a,b,c,d); + x->ntype = NSTAT; + return(x); +} + +Node *op2(int a, Node *b, Node *c) +{ + register Node *x; + x = node2(a,b,c); + x->ntype = NEXPR; + return(x); +} + +Node *op1(int a, Node *b) +{ + register Node *x; + x = node1(a,b); + x->ntype = NEXPR; + return(x); +} + +Node *stat1(int a, Node *b) +{ + register Node *x; + x = node1(a,b); + x->ntype = NSTAT; + return(x); +} + +Node *op3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = node3(a,b,c,d); + x->ntype = NEXPR; + return(x); +} + +Node *op4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = node4(a,b,c,d,e); + x->ntype = NEXPR; + return(x); +} + +Node *stat2(int a, Node *b, Node *c) +{ + register Node *x; + x = node2(a,b,c); + x->ntype = NSTAT; + return(x); +} + +Node *stat4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = node4(a,b,c,d,e); + x->ntype = NSTAT; + return(x); +} + +Node *valtonode(Cell *a, int b) +{ + register Node *x; + + a->ctype = OCELL; + a->csub = b; + x = node1(0, (Node *) a); + x->ntype = NVALUE; + return(x); +} + +Node *rectonode(void) +{ + /* return valtonode(lookup("$0", symtab), CFLD); */ + return valtonode(recloc, CFLD); +} + +Node *makearr(Node *p) +{ + Cell *cp; + + if (isvalue(p)) { + cp = (Cell *) (p->narg[0]); + if (isfunc(cp)) + vyyerror(":38:%s is a function, not an array", + cp->nval); + else if (!isarr(cp)) { + xfree(cp->sval); + cp->sval = (unsigned char *) makesymtab(NSYMTAB); + cp->tval = ARR; + } + } + return p; +} + +Node *pa2stat(Node *a,Node *b,Node *c) +{ + register Node *x; + x = node4(PASTAT2, a, b, c, (Node *) paircnt); + paircnt++; + x->ntype = NSTAT; + return(x); +} + +Node *linkum(Node *a,Node *b) +{ + register Node *c; + + if (errorflag) /* don't link things that are wrong */ + return a; + if (a == NULL) return(b); + else if (b == NULL) return(a); + for (c = a; c->nnext != NULL; c = c->nnext) + ; + c->nnext = b; + return(a); +} + +void defn(Cell *v, /* turn on FCN bit in definition */ + Node *vl, Node *st) /* arglist, body of function */ +{ + Node *p; + int n; + + if (isarr(v)) { + vyyerror(":39:`%s' is an array name and a function name", + v->nval); + return; + } + v->tval = FCN; + v->sval = (unsigned char *) st; + n = 0; /* count arguments */ + for (p = vl; p; p = p->nnext) + n++; + v->fval = n; + dprintf( ("defining func %s (%d args)\n", v->nval, n) ); +} + +int isarg(const char *s) /* is s in argument list for current function? */ +{ + extern Node *arglist; + Node *p = arglist; + int n; + + for (n = 0; p != 0; p = p->nnext, n++) + if (strcmp((char *)((Cell *)(p->narg[0]))->nval, s) == 0) + return n; + return -1; +} diff --git a/nawk/run.c b/nawk/run.c @@ -0,0 +1,1962 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)run.c 1.33 (gritter) 12/25/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + + +/* from unixsrc:usr/src/common/cmd/awk/run.c /main/uw7_nj/1 */ +/* from RCS Header: run.c 1.3 91/08/12 */ + +#define tempfree(x,s) if (istemp(x)) tfree(x,s); else + +/* #define execute(p) (isvalue(p) ? (Cell *)((p)->narg[0]) : r_execute(p)) */ +#define execute(p) r_execute((Node *)p) + +#define DEBUG +#include <math.h> +#include <stdio.h> +#include <ctype.h> +#include <setjmp.h> +#include <pfmt.h> +#include <string.h> +#include <errno.h> +#include <wctype.h> +#include <inttypes.h> +#include <time.h> +#include "awk.h" +#include "y.tab.h" + +jmp_buf env; + +#define getfval(p) (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) +#define getsval(p) (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) + +static void tfree(register Cell *a, char *s); + +#define PA2NUM 29 +int pairstack[PA2NUM]; +long paircnt; +Node *winner = NULL; +Cell *tmps; + +static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; +Cell *true = &truecell; +static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM }; +Cell *false = &falsecell; +static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM }; +Cell *jbreak = &breakcell; +static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM }; +Cell *jcont = &contcell; +static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM }; +Cell *jnext = &nextcell; +static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM }; +Cell *jexit = &exitcell; +static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; +Cell *jret = &retcell; +static Cell tempcell ={ OCELL, CTEMP, 0, 0, 0.0, NUM }; + +Node *curnode = NULL; /* the node being executed, for debugging */ + +static const char + restoobig[] = ":40:%s() result %.20s too big", + notarray[] = ":41:%s is not an array", + ioerror[] = ":42:I/O error occurred on %s"; +const char + illstat[] = ":43:Illegal statement"; + +extern const char readvofid[], readvof[], badopen[]; + +static int growsprintf(unsigned char **, unsigned char **, + int *, const char *, ...); +static void growbuf(unsigned char **buf, int *bufsize, int incr, + unsigned char **ptr, const char *fn); +static void closeall(void); +static void caseconv(unsigned char *s, wint_t (*conv)(wint_t)); + +int run(Node *a) +{ + execute(a); + closeall(); + return 0; +} + +Cell *r_execute(Node *u) +{ + register Cell *(*proc)(Node **, int); + register Cell *x; + register Node *a; + + if (u == NULL) + return(true); + for (a = u; ; a = a->nnext) { + curnode = a; + if (isvalue(a)) { + x = (Cell *) (a->narg[0]); + if ((x->tval & FLD) && !donefld) + fldbld(); + else if ((x->tval & REC) && !donerec) + recbld(); + return(x); + } + if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ + error(MM_ERROR, illstat); + proc = proctab[a->nobj-FIRSTTOKEN]; + x = (*proc)(a->narg, a->nobj); + if ((x->tval & FLD) && !donefld) + fldbld(); + else if ((x->tval & REC) && !donerec) + recbld(); + if (isexpr(a)) + return(x); + /* a statement, goto next statement */ + if (isjump(x)) + return(x); + if (a->nnext == (Node *)NULL) + return(x); + tempfree(x, "execute"); + } +} + + +Cell *program(register Node **a, int n) +{ + register Cell *x = 0; + + if (setjmp(env) != 0) + goto ex; + if (a[0]) { /* BEGIN */ + x = execute(a[0]); + if (isexit(x)) + return(true); + if (isjump(x)) + error(MM_ERROR, + ":44:Illegal break, continue or next from BEGIN"); + if(x != 0) { tempfree(x, ""); } + } + loop: + if (a[1] || a[2]) + while (getrec(&record, &recsize) > 0) { + x = execute(a[1]); + if (isexit(x)) + break; + if(x != 0) { tempfree(x, ""); } + } + ex: + if (setjmp(env) != 0) + goto ex1; + if (a[2]) { /* END */ + x = execute(a[2]); + if (iscont(x)) /* read some more */ + goto loop; + if (isbreak(x) || isnext(x)) + error(MM_ERROR, ":45:Illegal break or next from END"); + if(x != 0) { tempfree(x, ""); } + } + ex1: + return(true); +} + +struct Frame { + int nargs; /* number of arguments in this call */ + Cell *fcncell; /* pointer to Cell for function */ + Cell **args; /* pointer to array of arguments after execute */ + Cell *retval; /* return value */ +}; + +#define NARGS 30 + +struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ +int nframe = 0; /* number of frames allocated */ +struct Frame *fp = NULL; /* frame pointer. bottom level unused */ + +Cell *call(Node **a, int n) +{ + static Cell newcopycell = { OCELL, CCOPY, 0, (unsigned char *) "", 0.0, NUM|STR|DONTFREE }; + int i, ncall, ndef; + Node *x; + Cell *args[NARGS], *oargs[NARGS], *y, *z, *fcn; + unsigned char *s; + + fcn = execute(a[0]); /* the function itself */ + s = fcn->nval; + if (!isfunc(fcn)) + error(MM_ERROR, ":46:Calling undefined function %s", s); + if (frame == NULL) { + fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); + if (frame == NULL) + error(MM_ERROR, ":47:Out of space for stack frames calling %s", s); + } + for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ + ncall++; + ndef = (int) fcn->fval; /* args in defn */ + dprintf( ("calling %s, %d args (%d in defn), fp=%ld\n", s, + ncall, ndef, (long)(fp-frame)) ); + if (ncall > ndef) { + if (ncall == 1) + error(MM_WARNING, ":48:Function %s called with 1 arg, uses only %d", + s, ndef); + else + error(MM_WARNING, ":49:Function %s called with %d args, uses only %d", + s, ncall, ndef); + } + if (ncall + ndef > NARGS) + error(MM_ERROR, ":50:Function %s has %d arguments, limit %d", + s, ncall+ndef, NARGS); + for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ + dprintf( ("evaluate args[%d], fp=%ld:\n", i, + (long)(fp-frame)) ); + y = execute(x); + oargs[i] = y; + dprintf( ("args[%d]: %s %f <%s>, t=%o\n", + i, y->nval, y->fval, isarr(y) ? + "(array)" : (char*) y->sval, y->tval) ); + if (isfunc(y)) + error(MM_ERROR, ":51:Cannot use function %s as argument in %s", + y->nval, s); + if (isarr(y)) + args[i] = y; /* arrays by ref */ + else + args[i] = copycell(y); + tempfree(y, "callargs"); + } + for ( ; i < ndef; i++) { /* add null args for ones not provided */ + args[i] = gettemp("nullargs"); + *args[i] = newcopycell; + } + fp++; /* now ok to up frame */ + if (fp >= frame + nframe) { + int dfp = fp - frame; /* old index */ + frame = (struct Frame *) + realloc(frame, (nframe += 100) * sizeof(struct Frame)); + if (frame == NULL) + error(MM_ERROR, ":52:Out of space for stack frames in %s", s); + fp = frame + dfp; + } + fp->fcncell = fcn; + fp->args = args; + fp->nargs = ndef; /* number defined with (excess are locals) */ + fp->retval = gettemp("retval"); + + dprintf( ("start exec of %s, fp=%ld\n", s, (long)(fp-frame)) ); + y = execute((Node *)(fcn->sval)); /* execute body */ + dprintf( ("finished exec of %s, fp=%ld\n", s, (long)(fp-frame)) ); + + for (i = 0; i < ndef; i++) { + Cell *t = fp->args[i]; + if (isarr(t)) { + if (t->csub == CCOPY) { + if (i >= ncall) { + freesymtab(t); + t->csub = CTEMP; + } else { + oargs[i]->tval = t->tval; + oargs[i]->tval &= ~(STR|NUM|DONTFREE); + oargs[i]->sval = t->sval; + tempfree(t, "oargsarr"); + } + } + } else if (t != y) { /* kludge to prevent freeing twice */ + t->csub = CTEMP; + tempfree(t, "fp->args"); + } + } + tempfree(fcn, "call.fcn"); + if (isexit(y) || isnext(y)) + return y; + tempfree(y, "fcn ret"); /* this can free twice! */ + z = fp->retval; /* return value */ + dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), + getsval(z), z->tval) ); + fp--; + return(z); +} + +Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ +{ + Cell *y; + + y = gettemp("copycell"); + y->csub = CCOPY; /* prevents freeing until call is over */ + y->nval = x->nval; + y->sval = x->sval ? tostring(x->sval) : NULL; + y->fval = x->fval; + y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ + /* is DONTFREE right? */ + return y; +} + +/*ARGSUSED2*/ +Cell *arg(Node **a, int nnn) +{ + int n; + + n = (intptr_t) a[0]; /* argument number, counting from 0 */ + dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); + if (n+1 > fp->nargs) + error(MM_ERROR, ":53:Argument #%d of function %s was not supplied", + n+1, fp->fcncell->nval); + return fp->args[n]; +} + +static int in_loop = 0; /* Flag : are we in a [while|do|for] loop ? */ + +Cell *jump(Node **a, int n) +{ + register Cell *y; + + switch (n) { + case EXIT: + if (a[0] != NULL) { + y = execute(a[0]); + errorflag = getfval(y); + tempfree(y, ""); + } + longjmp(env, 1); + case RETURN: + if (a[0] != NULL) { + y = execute(a[0]); + if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(fp->retval, getsval(y)); + fp->retval->fval = getfval(y); + fp->retval->tval |= NUM; + } + else if (y->tval & STR) + setsval(fp->retval, getsval(y)); + else if (y->tval & NUM) + setfval(fp->retval, getfval(y)); + tempfree(y, ""); + } + return(jret); + case NEXT: + return(jnext); + case BREAK: + if (posix && !in_loop) + error(MM_ERROR, ":101:break-statement outside of a loop"); + return(jbreak); + case CONTINUE: + if (posix && !in_loop) + error(MM_ERROR, ":102:continue-statement outside of a loop"); + return(jcont); + default: /* can't happen */ + error(MM_ERROR, ":54:Illegal jump type %d", n); + /*NOTREACHED*/ + return 0; + } +} + +Cell *getline(Node **a, int n) +{ + /* a[0] is variable, a[1] is operator, a[2] is filename */ + register Cell *r, *x; + unsigned char *buf = NULL; + int bufsize = 0; + FILE *fp; + + fflush(stdout); /* in case someone is waiting for a prompt */ + r = gettemp(""); + if (a[1] != NULL) { /* getline < file */ + x = execute(a[2]); /* filename */ + if ((intptr_t) a[1] == '|') /* input pipe */ + a[1] = (Node *) LE; /* arbitrary flag */ + fp = openfile((intptr_t) a[1], getsval(x)); + tempfree(x, ""); + if (fp == NULL) + n = -1; + else + n = readrec(&buf, &bufsize, fp); + if (n <= 0) { + ; + } else if (a[0] != NULL) { /* getline var <file */ + setsval(execute(a[0]), buf); + } else { /* getline <file */ + makerec(buf, bufsize); + } + } else { /* bare getline; use current input */ + if (a[0] == NULL) /* getline */ + n = getrec(&record, &recsize); + else { /* getline var */ + n = getrec(&buf, &bufsize); + setsval(execute(a[0]), buf); + } + } + setfval(r, (Awkfloat) n); + if (bufsize) + free(buf); + return r; +} + +Cell *getnf(register Node **a, int n) +{ + if (donefld == 0) + fldbld(); + return (Cell *) a[0]; +} + +Cell *array(register Node **a, int n) +{ + register Cell *x, *y, *z; + register unsigned char *s; + register Node *np; + unsigned char *buf = NULL; + int bufsz = 0, subseplen, len = 1, l; + + x = execute(a[0]); /* Cell* for symbol table */ + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "array"); + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "array"); + strcat((char*)buf, (char*)s); + if (np->nnext) + strcat((char*)buf, (char*)*SUBSEP); + tempfree(y, ""); + } + if (!isarr(x)) { + dprintf( ("making %s into an array\n", x->nval) ); + if (freeable(x)) + xfree(x->sval); + x->tval &= ~(STR|NUM|DONTFREE); + x->tval |= ARR; + x->sval = (unsigned char *) makesymtab(NSYMTAB); + } + z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); + z->ctype = OCELL; + z->csub = CVAR; + tempfree(x, ""); + free(buf); + return(z); +} + +Cell *delete(Node **a, int n) +{ + Cell *x, *y; + Node *np; + unsigned char *buf = NULL, *s; + int bufsz = 0, subseplen, len = 1, l; + + x = execute(a[0]); /* Cell* for symbol table */ + if (!isarr(x)) + return true; + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "delete"); + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "delete"); + strcat((char*)buf, (char*)s); + if (np->nnext) + strcat((char*)buf, (char*)*SUBSEP); + tempfree(y, ""); + } + freeelem(x, buf); + tempfree(x, ""); + free(buf); + return true; +} + +Cell *intest(Node **a, int n) +{ + register Cell *x, *ap, *k; + Node *p; + unsigned char *s; + unsigned char *buf = NULL; + int bufsz = 0, subseplen, len = 1, l; + + ap = execute(a[1]); /* array name */ + if (!isarr(ap)) + error(MM_ERROR, notarray, ap->nval); + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "intest"); + buf[0] = 0; + for (p = a[0]; p; p = p->nnext) { + x = execute(p); /* expr */ + s = getsval(x); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "array"); + strcat((char *)buf, (char*)s); + tempfree(x, ""); + if (p->nnext) + strcat((char *)buf, (char*)*SUBSEP); + } + k = lookup(buf, (Array *) ap->sval); + tempfree(ap, ""); + free(buf); + if (k == NULL) + return(false); + else + return(true); +} + + +Cell *matchop(Node **a, int n) +{ + register Cell *x, *y; + register unsigned char *s, *t; + register int i; + fa *pfa; + int (*mf)(void *, unsigned char *) = match, mode = 0; + + if (n == MATCHFCN) { + mf = pmatch; + mode = 1; + } + x = execute(a[1]); + s = getsval(x); + if (a[0] == 0) + i = (*mf)(a[2], s); + else { + y = execute(a[2]); + t = getsval(y); + pfa = makedfa(t, mode); + i = (*mf)(pfa, s); + tempfree(y, ""); + } + tempfree(x, ""); + if (n == MATCHFCN) { + int start, length; + if (patlen < 0) { + start = 0; + length = patlen; + } else { + start = chrdist(s, patbeg); + length = chrdist(patbeg, &patbeg[patlen - 1]); + } + setfval(rstartloc, (Awkfloat) start); + setfval(rlengthloc, (Awkfloat) length); + x = gettemp(""); + x->tval = NUM; + x->fval = start; + return x; + } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) + return(true); + else + return(false); +} + + +Cell *boolop(Node **a, int n) +{ + register Cell *x, *y; + register int i; + + x = execute(a[0]); + i = istrue(x); + tempfree(x, ""); + switch (n) { + case BOR: + if (i) return(true); + y = execute(a[1]); + i = istrue(y); + tempfree(y, ""); + if (i) return(true); + else return(false); + case AND: + if ( !i ) return(false); + y = execute(a[1]); + i = istrue(y); + tempfree(y, ""); + if (i) return(true); + else return(false); + case NOT: + if (i) return(false); + else return(true); + default: /* can't happen */ + error(MM_ERROR, ":55:Unknown boolean operator %d", n); + } + /*NOTREACHED*/ + return 0; +} + +Cell *relop(Node **a, int n) +{ + register int i; + register Cell *x, *y; + Awkfloat j; + + x = execute(a[0]); + y = execute(a[1]); + if (x->tval&NUM && y->tval&NUM) { + j = x->fval - y->fval; + i = j<0? -1: (j>0? 1: 0); + } else { + i = strcoll((char*)getsval(x), (char*)getsval(y)); + } + tempfree(x, ""); + tempfree(y, ""); + switch (n) { + case LT: if (i<0) return(true); + else return(false); + case LE: if (i<=0) return(true); + else return(false); + case NE: if (i!=0) return(true); + else return(false); + case EQ: if (i == 0) return(true); + else return(false); + case GE: if (i>=0) return(true); + else return(false); + case GT: if (i>0) return(true); + else return(false); + default: /* can't happen */ + error(MM_ERROR, ":56:Unknown relational operator %d", n); + } + /*NOTREACHED*/ + return 0; +} + +static void tfree(register Cell *a, char *s) +{ + if (dbg>1) printf("## tfree %.8s %06lo %s\n", + s, (long)a, a->sval ? a->sval : (unsigned char *)""); + if (freeable(a)) + xfree(a->sval); + if (a == tmps) + error(MM_ERROR, ":57:Tempcell list is curdled"); + a->cnext = tmps; + tmps = a; +} + +Cell *gettemp(const char *s) +{ int i; + register Cell *x; + + if (!tmps) { + tmps = (Cell *) calloc(100, sizeof(Cell)); + if (!tmps) + error(MM_ERROR, ":58:No space for temporaries"); + for(i = 1; i < 100; i++) + tmps[i-1].cnext = &tmps[i]; + tmps[i-1].cnext = 0; + } + x = tmps; + tmps = x->cnext; + *x = tempcell; + if (dbg>1) printf("## gtemp %.8s %06lo\n", s, (long)x); + return(x); +} + +Cell *indirect(Node **a, int n) +{ + register Cell *x; + register int m; + register unsigned char *s; + + x = execute(a[0]); + m = getfval(x); + if (m == 0 && !is2number(s = getsval(x), 0)) /* suspicion! */ + error(MM_ERROR, ":59:Illegal field $(%s)", s); + tempfree(x, ""); + x = fieldadr(m); + x->ctype = OCELL; + x->csub = CFLD; + return(x); +} + +Cell *substr(Node **a, int nnn) +{ + register int k, m, n; + wchar_t wc; + register unsigned char *s, *sp, *sq; + int temp; + register Cell *x, *y, *z = 0; + + x = execute(a[0]); + y = execute(a[1]); + if (a[2] != 0) + z = execute(a[2]); + s = getsval(x); + k = strlen((char*)s) + 1; + if (k <= 1) { + tempfree(x, ""); + tempfree(y, ""); + if (a[2] != 0) { + tempfree(z, ""); + } + x = gettemp(""); + setsval(x, (unsigned char *)""); + return(x); + } + m = getfval(y); + if (m <= 0) + m = 1; + else if (m > k) + m = k; + tempfree(y, ""); + if (a[2] != 0) { + n = getfval(z); + tempfree(z, ""); + } else + n = k - 1; + if (n < 0) + n = 0; + else if (n > k - m) + n = k - m; + dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); + if (mb_cur_max > 1) { + for (sp = s; m > 1 && *sp; m--) { + next(wc, sp, k); + sp += k; + } + m = sp - s + 1; + for (sq = sp ; n > 0 && *sq; n--) { + next(wc, sq, k); + sq += k; + } + n = sq - sp; + dprintf( ("substr: multibyte: m=%d, n=%d, s=%s\n", m, n, s) ); + } + y = gettemp(""); + temp = s[n+m-1]; /* with thanks to John Linderman */ + s[n+m-1] = '\0'; + setsval(y, s + m - 1); + s[n+m-1] = temp; + tempfree(x, ""); + return(y); +} + +Cell *sindex(Node **a, int nnn) +{ + register Cell *x, *y, *z; + register unsigned char *s1, *s2, *p1, *p2, *q; + int n, nq, n2; + wchar_t wc, wq, w2; + Awkfloat v = 0.0; + + x = execute(a[0]); + s1 = getsval(x); + y = execute(a[1]); + s2 = getsval(y); + + z = gettemp(""); + for (p1 = s1; next(wc, p1, n), wc != '\0'; p1 += n) { + for (q = p1, p2 = s2; + next(wq, q, nq), + next(w2, p2, n2), + w2 != '\0' && wq == w2; + q += nq, p2 += n2) + ; + if (w2 == '\0') { + v = (Awkfloat) chrdist(s1, p1); + break; + } + } + tempfree(x, ""); + tempfree(y, ""); + setfval(z, v); + return(z); +} + + +int format(unsigned char **buf, int *bufsize, const unsigned char *s, Node *a) +{ + unsigned char *fmt = NULL; + int fmtsz = 0; + unsigned char *p, *t; + const unsigned char *os; + register Cell *x; + int flag = 0; + + os = s; + fmt = malloc(fmtsz = CHUNK); + if (*bufsize == 0) + *buf = malloc(*bufsize = CHUNK); + if (fmt == NULL || *buf == NULL) + error(MM_ERROR, outofspace, "format"); + p = *buf; + while (*s) { + if (p >= &(*buf)[*bufsize]) + growbuf(buf, bufsize, CHUNK, &p, "format"); + if (*s != '%') { + *p++ = *s++; + continue; + } + if (*(s+1) == '%') { + *p++ = '%'; + s += 2; + continue; + } + for (t=fmt; (*t++ = *s) != '\0'; s++) { + if (t >= &fmt[fmtsz]) + growbuf(&fmt, &fmtsz, CHUNK, &t, "format"); + if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L') + break; /* the ansi panoply */ + if (*s == '*') { + x = execute(a); + a = a->nnext; + t--; + growsprintf(&fmt, &t, &fmtsz, "%d", (int) getfval(x)); + tempfree(x, ""); + } + } + *t = '\0'; + switch (*s) { + case 'a': case 'A': + case 'e': case 'E': + case 'f': case 'F': + case 'g': case 'G': + flag = 1; + break; + case 'd': case 'i': + flag = 2; + if(*(s-1) == 'l') break; + *(t-1) = 'l'; + *t = 'd'; + *++t = '\0'; + break; + case 'o': case 'x': case 'X': case 'u': + flag = *(s-1) == 'l' ? 12 : 13; + break; + case 's': + /* + * Note: If MB_CUR_MAX > 1, the precision is in + * bytes, not characters. This doesn't make much + * sense in awk context, but it seems to match + * what POSIX demands. + */ + flag = 4; + break; + case 'c': + if (mb_cur_max > 1) { + *(t-1) = 'l'; + *t = 'c'; + *++t = '\0'; + flag = 6; + } else + flag = 5; + break; + default: + flag = 0; + break; + } + if (flag == 0) { + growsprintf(buf, &p, bufsize, "%s", fmt); + continue; + } + if (a == NULL) + error(MM_ERROR, ":61:Not enough args in printf(%s)", + os); + x = execute(a); + a = a->nnext; + switch (flag) { + case 1: growsprintf(buf, &p, bufsize, (char *)fmt, getfval(x)); + break; + case 2: growsprintf(buf, &p, bufsize, (char *)fmt, + (long) getfval(x)); + break; + case 3: growsprintf(buf, &p, bufsize, (char *)fmt, + (int) getfval(x)); + break; + case 12:growsprintf(buf, &p, bufsize, (char *)fmt, + (unsigned long) getfval(x)); + break; + case 13:growsprintf(buf, &p, bufsize, (char *)fmt, + (unsigned int) getfval(x)); + break; + case 4: growsprintf(buf, &p, bufsize, (char *)fmt, getsval(x)); + break; + case 5: isnum(x) ? growsprintf(buf, &p, bufsize, (char *)fmt, + (int) getfval(x)) + : growsprintf(buf, &p, bufsize, (char *)fmt, + getsval(x)[0]); + break; + case 6: isnum(x) ? growsprintf(buf, &p, bufsize, (char *)fmt, + (wint_t) getfval(x)) + : growsprintf(buf, &p, bufsize, (char *)fmt, + (wint_t) getsval(x)[0]); + break; + } + tempfree(x, ""); + s++; + } + *p = '\0'; + for ( ; a; a = a->nnext) /* evaluate any remaining args */ + execute(a); + xfree(fmt); + return 0; +} + +Cell *awsprintf(Node **a, int n) +{ + register Cell *x; + register Node *y; + unsigned char *buf = NULL; + int bufsize = 0; + + y = a[0]->nnext; + x = execute(a[0]); + if (format(&buf, &bufsize, getsval(x), y) == -1) + error(MM_ERROR, ":62:sprintf string %.40s ... too long", buf); + tempfree(x, ""); + x = gettemp(""); + x->sval = /*tostring(buf);*/ buf ? buf : tostring(""); + x->tval = STR; + return(x); +} + +Cell *aprintf(Node **a, int n) +{ + FILE *fp; + register Cell *x; + register Node *y; + unsigned char *buf = NULL; + int bufsize = 0; + + y = a[0]->nnext; + x = execute(a[0]); + if (format(&buf, &bufsize, getsval(x), y) == -1) + error(MM_ERROR, ":63:printf string %.40s ... too long", buf); + tempfree(x, ""); + if (buf) { + if (a[1] == NULL) + fputs((char *)buf, stdout); + else { + fp = redirect((intptr_t)a[1], a[2]); + fputs((char *)buf, fp); + fflush(fp); + } + free(buf); + } + return(true); +} + +Cell *arith(Node **a, int n) +{ + Awkfloat i, j = 0; + double v; + register Cell *x, *y, *z; + + x = execute(a[0]); + i = getfval(x); + tempfree(x, ""); + if (n != UMINUS) { + y = execute(a[1]); + j = getfval(y); + tempfree(y, ""); + } + z = gettemp(""); + switch (n) { + case ADD: + i += j; + break; + case MINUS: + i -= j; + break; + case MULT: + i *= j; + break; + case DIVIDE: + if (j == 0) + error(MM_ERROR, ":64:Division by zero"); + i /= j; + break; + case MOD: + if (j == 0) + error(MM_ERROR, ":65:Division by zero in mod"); + modf(i/j, &v); + i = i - j * v; + break; + case UMINUS: + i = -i; + break; + case POWER: + if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ + i = ipow(i, (int) j); + else + i = errcheck(pow(i, j), (unsigned char *)"pow"); + break; + default: /* can't happen */ + error(MM_ERROR, ":66:Illegal arithmetic operator %d", n); + } + setfval(z, i); + return(z); +} + +double ipow(double x, int n) +{ + double v; + + if (n <= 0) + return 1; + v = ipow(x, n/2); + if (n % 2 == 0) + return v * v; + else + return x * v * v; +} + +Cell *incrdecr(Node **a, int n) +{ + register Cell *x, *z; + register int k; + Awkfloat xf; + + x = execute(a[0]); + xf = getfval(x); + k = (n == PREINCR || n == POSTINCR) ? 1 : -1; + if (n == PREINCR || n == PREDECR) { + setfval(x, xf + k); + return(x); + } + z = gettemp(""); + setfval(z, xf); + setfval(x, xf + k); + tempfree(x, ""); + return(z); +} + +Cell *assign(Node **a, int n) +{ + register Cell *x, *y; + Awkfloat xf, yf; + double v; + + y = execute(a[1]); + x = execute(a[0]); /* order reversed from before... */ + if (n == ASSIGN) { /* ordinary assignment */ + if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(x, getsval(y)); + x->fval = getfval(y); + x->tval |= NUM; + } + else if (y->tval & STR) + setsval(x, getsval(y)); + else if (y->tval & NUM) + setfval(x, getfval(y)); + else + funnyvar(y, (char *)gettxt(readvofid, readvof)); + tempfree(y, ""); + return(x); + } + xf = getfval(x); + yf = getfval(y); + switch (n) { + case ADDEQ: + xf += yf; + break; + case SUBEQ: + xf -= yf; + break; + case MULTEQ: + xf *= yf; + break; + case DIVEQ: + if (yf == 0) + error(MM_ERROR, ":67:Division by zero in /="); + xf /= yf; + break; + case MODEQ: + if (yf == 0) + error(MM_ERROR, ":68:Division by zero in %%="); + modf(xf/yf, &v); + xf = xf - yf * v; + break; + case POWEQ: + if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ + xf = ipow(xf, (int) yf); + else + xf = errcheck(pow(xf, yf), (unsigned char *)"pow"); + break; + default: + error(MM_ERROR, ":69:Illegal assignment operator %d", n); + break; + } + tempfree(y, ""); + setfval(x, xf); + return(x); +} + +Cell *cat(Node **a, int q) +{ + register Cell *x, *y, *z; + register int n1, n2; + register unsigned char *s; + + x = execute(a[0]); + y = execute(a[1]); + getsval(x); + getsval(y); + n1 = (int)strlen((char*)x->sval); + n2 = (int)strlen((char*)y->sval); + s = (unsigned char *) malloc(n1 + n2 + 1); + if (s == NULL) + error(MM_ERROR, ":70:Out of space concatenating %.15s and %.15s", + x->sval, y->sval); + strcpy((char*)s, (char*)x->sval); + strcpy((char*)s+n1, (char*)y->sval); + tempfree(y, ""); + z = gettemp(""); + z->sval = s; + z->tval = STR; + tempfree(x, ""); + return(z); +} + +Cell *pastat(Node **a, int n) +{ + register Cell *x; + + if (a[0] == 0) + x = execute(a[1]); + else { + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } + } + return x; +} + +Cell *dopa2(Node **a, int n) +{ + register Cell *x; + register int pair; + + pair = (intptr_t) a[3]; + if (pairstack[pair] == 0) { + x = execute(a[0]); + if (istrue(x)) + pairstack[pair] = 1; + tempfree(x, ""); + } + if (pairstack[pair] == 1) { + x = execute(a[1]); + if (istrue(x)) + pairstack[pair] = 0; + tempfree(x, ""); + x = execute(a[2]); + return(x); + } + return(false); +} + +Cell *split(Node **a, int nnn) +{ + Cell *x = 0, *y, *ap; + register unsigned char *s; + wchar_t sep, wc; + unsigned char *t, temp, num[25], *fs = 0; + int m, n, sepl; + + y = execute(a[0]); /* source string */ + s = getsval(y); + if (a[2] == 0) /* fs string */ + fs = *FS; + else if ((intptr_t) a[3] == STRING) { /* split(str,arr,"string") */ + x = execute(a[2]); + fs = getsval(x); + } else if ((intptr_t) a[3] == REGEXPR) + fs = (unsigned char*) "(regexpr)"; /* split(str,arr,/regexpr/) */ + else + error(MM_ERROR, ":71:Illegal type of split()"); + next(sep, fs, sepl); + ap = execute(a[1]); /* array name */ + freesymtab(ap); + dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) ); + ap->tval &= ~STR; + ap->tval |= ARR; + ap->sval = (unsigned char *) makesymtab(NSYMTAB); + + n = 0; + if ((*s != '\0' && sep != '\0' && fs[sepl] != '\0') || + ((intptr_t) a[3] == REGEXPR)) { /* reg expr */ + fa *pfa; + if ((intptr_t) a[3] == REGEXPR) { /* it's ready already */ + pfa = (fa *) a[2]; + } else { + pfa = makedfa(fs, 1); + } + pfa->notbol = 0; + if (nematch(pfa,s)) { + pfa->notbol = REG_NOTBOL; + do { + n++; + snprintf((char *)num, sizeof num, "%d", n); + temp = *patbeg; + *patbeg = '\0'; + setsymtab(num, s, 0.0, STR|CANBENUM, (Array *)ap->sval); + *patbeg = temp; + s = patbeg + patlen; + if (*(patbeg+patlen-1) == 0 || *s == 0) { + n++; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, "", 0.0, STR, (Array *) ap->sval); + pfa->notbol = 0; + goto spdone; + } + } while (nematch(pfa,s)); + } + n++; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, s, 0.0, STR|CANBENUM, (Array *)ap->sval); + spdone: + pfa = NULL; + } else if (sep == ' ') { + for (n = 0; ; ) { + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; + if (*s == 0) + break; + n++; + t = s; + next(wc, s, m); + do { + s += m; + next(wc, s, m); + } while (wc!=' ' && wc!='\t' && wc!='\n' && wc!='\0'); + temp = *s; + *s = '\0'; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, t, 0.0, STR|CANBENUM, (Array *)ap->sval); + *s = temp; + if (*s != 0) + s++; + } + } else if (*s != 0) { + for (;;) { + n++; + t = s; + while (next(wc, s, m), + wc != sep && wc != '\n' && wc != '\0') + s += m; + temp = *s; + *s = '\0'; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, t, 0.0, STR|CANBENUM, (Array *)ap->sval); + *s = temp; + if (wc == '\0') + break; + s += m; + } + } + tempfree(ap, ""); + tempfree(y, ""); + if (a[2] != 0 && (intptr_t) a[3] == STRING) { + tempfree(x, ""); + } + x = gettemp(""); + x->tval = NUM; + x->fval = n; + return(x); +} + +Cell *condexpr(Node **a, int n) +{ + register Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } else { + tempfree(x, ""); + x = execute(a[2]); + } + return(x); +} + +Cell *ifstat(Node **a, int n) +{ + register Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } else if (a[2] != 0) { + tempfree(x, ""); + x = execute(a[2]); + } + return(x); +} + +Cell *whilestat(Node **a, int n) +{ + register Cell *x; + + in_loop++; + for (;;) { + x = execute(a[0]); + if (!istrue(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[1]); + if (isbreak(x)) { + x = true; + in_loop--; + return(x); + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *dostat(Node **a, int n) +{ + register Cell *x; + + in_loop++; + for (;;) { + x = execute(a[0]); + if (isbreak(x)) { + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[1]); + if (!istrue(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *forstat(Node **xa, int n) +{ + char **a = (char **)xa; + register Cell *x; + + in_loop++; + x = execute(a[0]); + tempfree(x, ""); + for (;;) { + if (a[1]!=0) { + x = execute(a[1]); + if (!istrue(x)) { + in_loop--; + return(x); + } + else tempfree(x, ""); + } + x = execute(a[3]); + if (isbreak(x)) { /* turn off break */ + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[2]); + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *instat(Node **a, int n) +{ + register Cell *x, *vp, *arrayp, *cp, *ncp; + Array *tp; + int i; + + in_loop++; + vp = execute(a[0]); + arrayp = execute(a[1]); + if (!isarr(arrayp)) + error(MM_ERROR, notarray, arrayp->nval); + tp = (Array *) arrayp->sval; + tempfree(arrayp, ""); + for (i = 0; i < tp->size; i++) { /* this routine knows too much */ + for (cp = tp->tab[i]; cp != NULL; cp = ncp) { + setsval(vp, cp->nval); + ncp = cp->cnext; + x = execute(a[2]); + if (isbreak(x)) { + tempfree(vp, ""); + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + tempfree(vp, ""); + in_loop--; + return(x); + } + tempfree(x, ""); + } + } + in_loop--; + return true; +} + +static int closefile(const char *a); + +Cell *bltin(Node **a, int n) +{ + static unsigned saved_srand = 1; + register Cell *x, *y; + Awkfloat u; + register int t; + unsigned char *p, *buf; + Node *nextarg; + + t = (intptr_t) a[0]; + x = execute(a[1]); + nextarg = a[1]->nnext; + switch (t) { + case FLENGTH: + u = (Awkfloat) chrlen(getsval(x)); break; + case FLOG: + u = errcheck(log(getfval(x)), (unsigned char *)"log"); break; + case FINT: + modf(getfval(x), &u); break; + case FEXP: + u = errcheck(exp(getfval(x)), (unsigned char *)"exp"); break; + case FSQRT: + u = errcheck(sqrt(getfval(x)), (unsigned char *)"sqrt"); break; + case FSIN: + u = sin(getfval(x)); break; + case FCOS: + u = cos(getfval(x)); break; + case FATAN: + if (nextarg == 0) { + error(MM_WARNING, + ":72:atan2 requires two arguments; returning 1.0"); + u = 1.0; + } else { + y = execute(a[1]->nnext); + u = atan2(getfval(x), getfval(y)); + tempfree(y, ""); + nextarg = nextarg->nnext; + } + break; + case FSYSTEM: + fflush(stdout); /* in case something is buffered already */ + u = (Awkfloat) system((char *)getsval(x)) / 256; /* 256 is unix-dep */ + break; + case FRAND: + u = (Awkfloat) (rand() % 32767) / 32767.0; + break; + case FSRAND: + u = saved_srand; /* return previous seed */ + if (x->tval & REC) /* no argument provided */ + saved_srand = time(NULL); + else + saved_srand = getfval(x); + srand((int) saved_srand); + break; + case FTOUPPER: + case FTOLOWER: + p = getsval(x); + if ((buf = malloc(strlen((char *)p) + 1)) == 0) + error(MM_ERROR, outofspace, "case-conversion"); + strcpy((char*)buf, (char*)getsval(x)); + if (t == FTOUPPER) { + if (mb_cur_max == 1) { + for (p = buf; *p; p++) + if (islower(*p)) + *p = toupper(*p); + } else + caseconv(buf, towupper); + } else { + if (mb_cur_max == 1) { + for (p = buf; *p; p++) + if (isupper(*p)) + *p = tolower(*p); + } else + caseconv(buf, towlower); + } + tempfree(x, ""); + x = gettemp(""); + setsval(x, buf); + free(buf); + return x; + case FCLOSE: + u = (Awkfloat)closefile((char *)getsval(x)); + break; + default: /* can't happen */ + error(MM_ERROR, ":73:Illegal function type %d", t); + break; + } + tempfree(x, ""); + x = gettemp(""); + setfval(x, u); + if (nextarg != 0) { + error(MM_WARNING, ":74:Function has too many arguments"); + for ( ; nextarg; nextarg = nextarg->nnext) + execute(nextarg); + } + return(x); +} + +Cell *print(Node **a, int n) +{ + register Node *x; + register Cell *y; + FILE *fp; + + if (a[1] == 0) + fp = stdout; + else + fp = redirect((intptr_t)a[1], a[2]); + for (x = a[0]; x != NULL; x = x->nnext) { + y = execute(x); + /* + * ALMOST getsval(). POSIX.2 requires that + * numeric values be converted according to OFMT + * (not CONVFMT) for print. + */ + if (posix && (y->tval & (ARR|FLD|REC|STR)) == STR) + fputs((char *)y->sval, fp); + else if (!posix || (y->tval & (ARR|FLD|REC|NUM)) != NUM) + fputs((char *)r_getsval(y), fp); + else if ((long)y->fval == y->fval) + fprintf(fp, "%ld", (long)y->fval); + else + fprintf(fp, (char *)*OFMT, y->fval); + tempfree(y, ""); + if (x->nnext == NULL) + fputs((char *)*ORS, fp); + else + fputs((char *)*OFS, fp); + } + if (a[1] != 0) + fflush(fp); + return(true); +} + +/*ARGSUSED*/ +Cell *nullproc(Node **a, int n) { return 0; } + + +static struct afile +{ + FILE *fp; + unsigned char *fname; + int mode; /* '|', 'a', 'w' */ +} *files; +static int fopen_max; + +FILE *redirect(int a, Node *b) +{ + FILE *fp; + Cell *x; + unsigned char *fname; + + x = execute(b); + fname = getsval(x); + fp = openfile(a, fname); + if (fp == NULL) + error(MM_ERROR, badopen, fname, strerror(errno)); + tempfree(x, ""); + return fp; +} + +FILE *openfile(int a, unsigned char *s) +{ + register int i, m; + register FILE *fp = 0; + + if (*s == '\0') + error(MM_ERROR, ":75:Null file name in print or getline"); + for (i=0; i < fopen_max; i++) + if (files[i].fname && + strcmp((char*)s, (char*)files[i].fname) == 0) + if ((a == files[i].mode) || (a==APPEND && files[i].mode==GT)) + return files[i].fp; + for (i=0; i < fopen_max; i++) + if (files[i].fp == 0) + break; + if (i >= fopen_max) { + if ((files = realloc(files, sizeof *files * + (fopen_max = (i + 15))))==0) + error(MM_ERROR, ":76:%s makes too many open files", s); + memset(&files[i], 0, (fopen_max - i) * sizeof *files); + } + fflush(stdout); /* force a semblance of order */ + m = a; + if (a == GT) { + fp = fopen((char *)s, "w"); + } else if (a == APPEND) { + fp = fopen((char *)s, "a"); + m = GT; /* so can mix > and >> */ + } else if (a == '|') { /* output pipe */ + fp = popen((char *)s, "w"); + } else if (a == LE) { /* input pipe */ + fp = popen((char *)s, "r"); + } else if (a == LT) { /* getline <file */ + fp = strcmp((char *)s, "-") == 0 ? stdin : fopen((char *)s, "r"); /* "-" is stdin */ + } else /* can't happen */ + error(MM_ERROR, ":77:Illegal redirection"); + if (fp != NULL) { + files[i].fname = tostring(s); + files[i].fp = fp; + files[i].mode = m; + } + return fp; +} + +static int endfile(struct afile *afp) +{ + int ret; + + if (ferror(afp->fp)) { + clearerr(afp->fp); + error(MM_WARNING, ioerror, afp->fname); + errorflag = 1; + } + if (afp->mode == '|' || afp->mode == LE) + ret = pclose(afp->fp); + else + ret = fclose(afp->fp); + if (ret == EOF) { + error(MM_WARNING, ":79:I/O error occurred while closing %s", + afp->fname); + errorflag = 1; + } + if (afp->fp != stdout) { + xfree(afp->fname); + afp->fp = 0; + } + return ret; +} + +static int closefile(const char *a) +{ + int i, ret; + + ret = EOF; + for (i = 0; i < fopen_max; i++) + if (files[i].fname && strcmp(a, (char*)files[i].fname) == 0) + ret = endfile(&files[i]); + return(ret); +} + +static void closeall(void) +{ + struct afile std; + int i; + + for (i = 0; i < fopen_max; i++) + if (files[i].fp) + (void)endfile(&files[i]); + std.fp = stdout; + std.fname = (unsigned char *)"<stdout>"; + std.mode = GT; + (void)endfile(&std); +} + +Cell *sub(Node **a, int nnn) +{ + unsigned char *sptr, *pb, *q; + register Cell *x, *y, *result; + unsigned char *buf = NULL, *t; + int bufsize = 0; + fa *pfa; + + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y, ""); + } + y = execute(a[2]); /* replacement string */ + result = false; + if (pmatch(pfa, t)) { + growbuf(&buf, &bufsize, CHUNK, NULL, "sub"); + pb = buf; + sptr = t; + while (sptr < patbeg) { + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "sub"); + } + sptr = getsval(y); + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; /* skip \, */ + *pb++ = *sptr++; /* add & */ + } else if (*sptr == '&') { + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + growbuf(&buf, &bufsize, CHUNK, + &pb, "sub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "sub"); + } + *pb = '\0'; + sptr = patbeg + patlen; + if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) + while ((*pb++ = *sptr++)) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, + "sub"); + } + setsval(x, buf); + result = true;; + free(buf); + } + tempfree(x, ""); + tempfree(y, ""); + return result; +} + +Cell *gsub(Node **a, int nnn) +{ + register Cell *x, *y; + unsigned char *rptr, *sptr, *t, *pb; + unsigned char *buf = NULL; + int bufsize = 0; + register fa *pfa; + int mflag, num; + + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y, ""); + } + y = execute(a[2]); /* replacement string */ + pfa->notbol = 0; + if (pmatch(pfa, t)) { + pfa->notbol = REG_NOTBOL; + growbuf(&buf, &bufsize, CHUNK, NULL, "gsub"); + pb = buf; + rptr = getsval(y); + do { + /* + unsigned char *p; + int i; + printf("target string: %s, *patbeg = %o, patlen = %d\n", + t, *patbeg, patlen); + printf(" match found: "); + p=patbeg; + for (i=0; i<patlen; i++) + printf("%c", *p++); + printf("\n"); + */ + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; + *pb++ = *sptr++; + } else if (*sptr == '&') { + unsigned char *q; + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } + if (*t == 0) /* at end */ + goto done; + *pb++ = *t++; + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + while (sptr < patbeg) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, + &pb, "gsub"); + *pb++ = *sptr++; + } + sptr = rptr; + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; + *pb++ = *sptr++; + } else if (*sptr == '&') { + unsigned char *q; + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, + &pb, "gsub"); + } + t = patbeg + patlen; + if ((*(t-1) == 0) || (*t == 0)) + goto done; + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + while ((*pb++ = *sptr++)) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "gsub"); + } + done: *pb = '\0'; + setsval(x, buf); + pfa->notbol = 0; + free(buf); + } + tempfree(x, ""); + tempfree(y, ""); + x = gettemp(""); + x->tval = NUM; + x->fval = num; + return(x); +} +#include <stdarg.h> /* MR ul92-34309a2 */ +static int +growsprintf(unsigned char **whole, unsigned char **target, int *size, + const char *fmt, ...) +{ + va_list ap; + int ret; + size_t diff = 0, mx; + + if (*size == 0) { + if ((*whole = malloc(*size = CHUNK)) == NULL) + goto oflo; + *target = *whole; + } + diff = *target - *whole; +again: va_start(ap, fmt); + + mx = *size - diff - 8; + ret = vsnprintf((char *)*target, mx, fmt, ap); + va_end(ap); + + if (ret < 0 || ret >= mx) { + if (ret < 0) { + char dummy[2]; + va_start(ap, fmt); + ret = vsnprintf(dummy, sizeof dummy, fmt, ap); + va_end(ap); + if (ret < 0) + goto oflo; + } + if ((*whole = realloc(*whole, *size = ret + 1 + diff + 8)) == 0) + oflo: error(MM_ERROR, + ":103:Formatted result would be too long: %.20s ...", + fmt); + *target = &(*whole)[diff]; + goto again; + } + + while (**target) /* NUL characters might have been printed; */ + (*target)++; /* don't skip past them. */ + return ret; +} + +int chrlen(const unsigned char *s) +{ + wchar_t wc; + int m = 0, n; + + while (next(wc, s, n), wc != '\0') { + s += n; + m++; + } + return m; +} + +int chrdist(const unsigned char *s, const unsigned char *end) +{ + wchar_t wc; + int m = 0, n; + + while (next(wc, s, n), s <= end) { + s += n; + m++; + } + return m; +} + +static void caseconv(unsigned char *s, wint_t (*conv)(wint_t)) +{ + unsigned char *t = s; + wchar_t wc; + int len, nlen; + + while (*s) { + len = mbtowc(&wc, (char *)s, mb_cur_max); + if (len < 0) + *t++ = *s++; + else { + wc = conv(wc); + if ((nlen = wctomb((char *)t, wc)) <= len) { + t += nlen, s += len; + } else + *t++ = *s++; + } + } + *t = '\0'; +} + +static void growbuf(unsigned char **buf, int *bufsize, int incr, + unsigned char **ptr, const char *fn) +{ + unsigned char *op; + + op = *buf; + if ((*buf = realloc(*buf, *bufsize += incr)) == NULL) + error(MM_ERROR, outofspace, fn ? fn : ""); + if (ptr && *ptr) + *ptr = &(*buf)[*ptr - op]; +} diff --git a/nawk/tran.c b/nawk/tran.c @@ -0,0 +1,483 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)tran.c 1.16 (gritter) 2/4/05> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/tran.c /main/uw7_nj/1 */ +/* from RCS Header: tran.c 1.2 91/06/25 */ + + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> + +#undef RS + +#define FULLTAB 2 /* rehash when table gets this x full */ +#define GROWTAB 4 /* grow table by this factor */ + +Array *symtab; /* main symbol table */ + +unsigned char **FS; /* initial field sep */ +unsigned char **RS; /* initial record sep */ +unsigned char **OFS; /* output field sep */ +unsigned char **ORS; /* output record sep */ +unsigned char **OFMT; /* output format for numbers */ +unsigned char **CONVFMT; /* generic format for numbers->strings */ +Awkfloat *NF; /* number of fields in current record */ +Awkfloat *NR; /* number of current record */ +Awkfloat *FNR; /* number of current record in current file */ +unsigned char **FILENAME; /* current filename argument */ +Awkfloat *ARGC; /* number of arguments from command line */ +unsigned char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ +Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ +Awkfloat *RLENGTH; /* length of same */ + +Cell *recloc; /* location of record */ +Cell *nrloc; /* NR */ +Cell *nfloc; /* NF */ +Cell *fsloc; /* FS */ +Cell *fnrloc; /* FNR */ +Array *ARGVtab; /* symbol table containing ARGV[...] */ +Array *ENVtab; /* symbol table containing ENVIRON[...] */ +Cell *rstartloc; /* RSTART */ +Cell *rlengthloc; /* RLENGTH */ +Cell *symtabloc; /* SYMTAB */ + +Cell *nullloc; +Node *nullnode; /* zero&null, converted into a node for comparisons */ + +extern Cell **fldtab; +static int hash(register unsigned char *s, int n); +static void rehash(Array *tp); + +static const char + assigntovid[] = ":80", + assigntov[] = "assign to"; + +const char + readvofid[] = ":81", + readvof[] = "read value of", + outofspace[] = ":82:Out of space in %s", + nlstring[] = ":83:Newline in string %.10s ..."; + +void syminit(void) +{ + symtab = makesymtab(NSYMTAB); + setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); + /* this is used for if(x)... tests: */ + nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); + nullnode = valtonode(nullloc, CCON); + /* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */ + recloc = fldtab[0]; + fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); + FS = &fsloc->sval; + RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; + ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval; + nfloc = setsymtab("NF", "", 0.0, NUM, symtab); + NF = &nfloc->fval; + nrloc = setsymtab("NR", "", 0.0, NUM, symtab); + NR = &nrloc->fval; + fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); + FNR = &fnrloc->fval; + SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); + RSTART = &rstartloc->fval; + rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); + RLENGTH = &rlengthloc->fval; + symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + symtabloc->sval = (unsigned char *) symtab; +} + +void arginit(int ac, unsigned char **av) +{ + Cell *cp; + int i; + unsigned char temp[25]; + + for (i = 1; i < ac; i++) /* first make FILENAME first real argument */ + if (!isclvar(av[i])) { + setsval(lookup("FILENAME", symtab), av[i]); + break; + } + ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; + cp = setsymtab("ARGV", "", 0.0, ARR, symtab); + ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ + cp->sval = (unsigned char *) ARGVtab; + for (i = 0; i < ac; i++) { + snprintf((char *)temp, sizeof temp, "%d", i); + setsymtab(temp, *av, 0.0, STR|CANBENUM, ARGVtab); + av++; + } +} + +void envinit(unsigned char **envp) +{ + Cell *cp; + unsigned char *p; + + cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); + ENVtab = makesymtab(NSYMTAB); + cp->sval = (unsigned char *) ENVtab; + for ( ; *envp; envp++) { + if ((p = (unsigned char *) strchr((char *) *envp, '=')) == NULL) /* index() on bsd */ + continue; + *p++ = 0; /* split into two strings at = */ + setsymtab(*envp, p, 0.0, STR|CANBENUM, ENVtab); + p[-1] = '='; /* restore in case env is passed down to a shell */ + } +} + +Array *makesymtab(int n) +{ + Array *ap; + Cell **tp; + + ap = (Array *) malloc(sizeof(Array)); + tp = (Cell **) calloc(n, sizeof(Cell *)); + if (ap == NULL || tp == NULL) + error(MM_ERROR, outofspace, "makesymtab"); + ap->nelem = 0; + ap->size = n; + ap->tab = tp; + return(ap); +} + +void freesymtab(Cell *ap) /* free symbol table */ +{ + Cell *cp, *temp; + Array *tp; + int i; + + if (!isarr(ap)) + return; + tp = (Array *) ap->sval; + if (tp == NULL) + return; + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp != NULL; cp = temp) { + xfree(cp->nval); + if (freeable(cp)) + xfree(cp->sval); + temp = cp->cnext; /* avoids freeing then using */ + free(cp); + } + } + free(tp->tab); + free(tp); +} + +void freeelem(Cell *ap, unsigned char *s) + /* free elem s from ap (i.e., ap["s"] */ +{ + Array *tp; + Cell *p, *prev = NULL; + int h; + + tp = (Array *) ap->sval; + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + if (strcmp((char *) s, (char *) p->nval) == 0) { + if (prev == NULL) /* 1st one */ + tp->tab[h] = p->cnext; + else /* middle somewhere */ + prev->cnext = p->cnext; + if (freeable(p)) + xfree(p->sval); + free(p->nval); + free(p); + tp->nelem--; + return; + } +} + +Cell *ssetsymtab(unsigned char *n, unsigned char *s, Awkfloat f, + unsigned t, Array *tp) +{ + register int h; + register Cell *p; + + if (n != NULL && (p = lookup(n, tp)) != NULL) { + dprintf( ("setsymtab found %lo: n=%s", (long)p, p->nval) ); + dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) ); + return(p); + } + p = (Cell *) malloc(sizeof(Cell)); + if (p == NULL) + error(MM_ERROR, ":84:Symbol table overflow at %s", n); + p->nval = tostring(n); + p->sval = s ? tostring(s) : tostring(""); + p->fval = f; + p->tval = t & ~CANBENUM; + p->csub = 0; + if (t & CANBENUM) + (void)is2number(0, p); + tp->nelem++; + if (tp->nelem > FULLTAB * tp->size) + rehash(tp); + h = hash(n, tp->size); + p->cnext = tp->tab[h]; + tp->tab[h] = p; + dprintf( ("setsymtab set %lo: n=%s", (long)p, p->nval) ); + dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) ); + return(p); +} + +static int hash(register unsigned char *s, int n) + /* form hash value for string s */ +{ + register unsigned hashval; + + for (hashval = 0; *s != '\0'; s++) + hashval = (*s + 31 * hashval); + return hashval % n; +} + +static void rehash(Array *tp) /* rehash items in small table into big one */ +{ + int i, nh, nsz; + Cell *cp, *op, **np; + + nsz = GROWTAB * tp->size; + np = (Cell **) calloc(nsz, sizeof(Cell *)); + if (np == NULL) + error(MM_ERROR, outofspace, "rehash"); + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp; cp = op) { + op = cp->cnext; + nh = hash(cp->nval, nsz); + cp->cnext = np[nh]; + np[nh] = cp; + } + } + free(tp->tab); + tp->tab = np; + tp->size = nsz; +} + +Cell *slookup(register unsigned char *s, Array *tp) /* look for s in tp */ +{ + register Cell *p, *prev = NULL; + int h; + + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + if (strcmp((char *) s, (char *) p->nval) == 0) + return(p); /* found it */ + return(NULL); /* not found */ +} + +Awkfloat setfval(register Cell *vp, Awkfloat f) +{ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(assigntovid, assigntov)); + if (vp->tval & FLD) { + int n; + donerec = 0; /* mark $0 invalid */ + for (n = 0; vp != fldtab[n]; n++); + if (n > *NF) + newfld(n); + dprintf( ("setting field %d to %g\n", n, f) ); + } else if (vp->tval & REC) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } + vp->tval &= ~STR; /* mark string invalid */ + vp->tval |= NUM; /* mark number ok */ + dprintf( ("setfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), f, vp->tval) ); + return vp->fval = f; +} + +void funnyvar(Cell *vp, char *rw) +{ + if (vp->tval & ARR) + error(MM_ERROR, ":85:Cannot %s %s; it's an array name.", + rw, vp->nval); + if (vp->tval & FCN) + error(MM_ERROR, ":86:Cannot %s %s; it's a function.", + rw, vp->nval); + error(MM_ERROR, ":87:Funny variable %o: n=%s s=\"%s\" f=%g t=%o", + vp, vp->nval, vp->sval, vp->fval, vp->tval); +} + +unsigned char *setsval(register Cell *vp, unsigned char *s) +{ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(assigntovid, assigntov)); + if (vp->tval & FLD) { + int n; + donerec = 0; /* mark $0 invalid */ + for (n = 0; vp != fldtab[n]; n++); + if (n > *NF) + newfld(n); + dprintf( ("setting field %d to %s\n", n, s) ); + } else if (vp->tval & REC) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } else if (vp == fsloc && donefld == 0) { + /* + * Because POSIX.2 requires that awk act as if it always + * splits the current input line immediately after reading, + * we force it to be split into fields just before a change + * to FS if we haven't needed to do so yet. + */ + fldbld(); + } + vp->tval &= ~NUM; + vp->tval |= STR; + s = tostring(s); /* moved to here since "s" can be "vp->sval" */ + if (freeable(vp)) + xfree(vp->sval); + if (vp->tval & REC) { + /* + * Make sure that recsize is large enough to build + * fields afterwards. + */ + unsigned char *os = s; + + s = makerec(s, strlen((char *)s) + 1); + free(os); + } else + vp->tval &= ~DONTFREE; + dprintf( ("setsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval, s, vp->tval) ); + return(vp->sval = s); +} + +Awkfloat r_getfval(register Cell *vp) +{ + /* if (vp->tval & ARR) + ERROR "Illegal reference to array %s", vp->nval FATAL; + return 0.0; */ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(readvofid, readvof)); + if ((vp->tval & FLD) && donefld == 0) + fldbld(); + else if ((vp->tval & REC) && donerec == 0) + recbld(); + if (!isnum(vp)) { /* not marked as a number */ + vp->fval = awk_atof((char *)vp->sval); /* best guess */ + if (is2number(vp->sval, 0) && !(vp->tval&CON)) + vp->tval |= NUM; /* make NUM only sparingly */ + } + dprintf( ("getfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval, vp->fval, vp->tval) ); + return(vp->fval); +} + +unsigned char *r_getsval(register Cell *vp) +{ + unsigned char s[100]; + + /* if (vp->tval & ARR) + ERROR "Illegal reference to array %s", + vp->nval FATAL; + return ""; */ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(readvofid, readvof)); + if ((vp->tval & FLD) && donefld == 0) + fldbld(); + else if ((vp->tval & REC) && donerec == 0) + recbld(); + if ((vp->tval & STR) == 0) { + if (!(vp->tval&DONTFREE)) + xfree(vp->sval); + if ((long)vp->fval == vp->fval) { + snprintf((char *)s, sizeof s, "%ld", (long)vp->fval); + vp->tval |= STR; + } else { + snprintf((char *)s, sizeof s, + (char *)(posix ? *CONVFMT : *OFMT), + vp->fval); + /* + * In case CONVFMT is changed by the program, + * we leave the string value uncached for non- + * integer numeric constants. Ugh. + */ + if (!(vp->tval & CON)) + vp->tval |= STR; + } + vp->sval = tostring(s); + vp->tval &= ~DONTFREE; + } + dprintf( ("getsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), vp->sval ? vp->sval : tostring(""), vp->tval) ); + return(vp->sval); +} + +unsigned char *stostring(register const unsigned char *s) +{ + register unsigned char *p; + + p = malloc(strlen((char *) s)+1); + if (p == NULL) + error(MM_ERROR, ":88:Out of space in tostring on %s", s); + strcpy((char *) p, (char *) s); + return(p); +} + +unsigned char *qstring(unsigned char *s, int delim) + /* collect string up to delim */ +{ + unsigned char *q; + int c, n; + + for (q = cbuf; (c = *s) != delim; s++) { + if (q >= cbuf + CBUFLEN - 1) + vyyerror(":89:String %.10s ... too long", cbuf); + else if (c == '\n') + vyyerror(nlstring, cbuf); + else if (c != '\\') + *q++ = c; + else /* \something */ + switch (c = *++s) { + case '\\': *q++ = '\\'; break; + case 'n': *q++ = '\n'; break; + case 't': *q++ = '\t'; break; + case 'b': *q++ = '\b'; break; + case 'f': *q++ = '\f'; break; + case 'r': *q++ = '\r'; break; + default: + if (!isdigit(c)) { + *q++ = c; + break; + } + n = c - '0'; + if (isdigit(s[1])) { + n = 8 * n + *++s - '0'; + if (isdigit(s[1])) + n = 8 * n + *++s - '0'; + } + *q++ = n; + break; + } + } + *q = '\0'; + return cbuf; +} diff --git a/nawk/version.c b/nawk/version.c @@ -0,0 +1,25 @@ +#include "awk.h" +#if defined (SU3) +const char version[] = "@(#)awk_su3.sl 1.51 (gritter) 12/25/06"; +int posix = 1; +#elif defined (SUS) +const char version[] = "@(#)awk_sus.sl 1.51 (gritter) 12/25/06"; +int posix = 1; +#else +const char version[] = "@(#)nawk.sl 1.51 (gritter) 12/25/06"; +int posix = 0; +#endif +/* SLIST */ +/* +awk.g.y: Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06> +awk.h: Sccsid @(#)awk.h 1.23 (gritter) 12/25/04> +awk.lx.l: Sccsid @(#)awk.lx.l 1.13 (gritter) 11/22/05> +b.c: Sccsid @(#)b.c 1.6 (gritter) 5/15/04> +lib.c: Sccsid @(#)lib.c 1.27 (gritter) 12/25/06> +main.c: Sccsid @(#)main.c 1.14 (gritter) 12/19/04> +maketab.c: Sccsid @(#)maketab.c 1.11 (gritter) 12/4/04> +parse.c: Sccsid @(#)parse.c 1.7 (gritter) 12/4/04> +run.c: Sccsid @(#)run.c 1.33 (gritter) 12/25/06> +tran.c: Sccsid @(#)tran.c 1.16 (gritter) 2/4/05> +rerule.sed:# Sccsid @(#)rerule.sed 1.1 (gritter) 2/6/05 +*/ diff --git a/od/mkfile b/od/mkfile @@ -0,0 +1,8 @@ +BIN = od +OBJ = od.o +LOCAL_CFLAGS = -DSUS +INSTALL_BIN = od +INSTALL_MAN1 = od.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/od/od.1 b/od/od.1 @@ -0,0 +1,291 @@ +.\" +.\" Sccsid @(#)od.1 1.10 (gritter) 2/6/05 +.\" Parts taken from od(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH OD 1 "2/6/05" "Heirloom Toolchest" "User Commands" +.SH NAME +od \- octal dump +.SH SYNOPSIS +.PD 0 +.HP +.ad l +\fBod\fR [\fB\-bcCdDfFoOsSvxX\fR] [\fIfile\fR] +[[\fB+\fR]\fIoffset\fR[\fB.\fR][\fBb\fR]] +.HP +.ad l +\fBod\fR [\fB\-v\fR] [\fB\-A\ \fIaddress_base\fR] [\fB\-j\ \fIskip\fR] +[\fB\-N\ \fIcount\fR] [\fB\-t\ \fItype_string\fR] ... [\fIfile\fR ...] +.br +.PD +.ad b +.SH DESCRIPTION +.I Od +dumps +.I file +in +one or more formats +as +selected by the option arguments. +If no format specification is present, +.B \-o +is default. +The meanings of the option arguments are: +.TP +.B \-b +Interpret bytes in octal. +.TP +.B \-c +Interpret characters. +Certain non-graphic characters appear as C escapes: +null=\e0, +backspace=\eb, +formfeed=\ef, +newline=\en, +return=\er, +tab=\et; +others appear as 3-digit octal numbers. +For a multibyte character, +the graphical representation is printed for its first byte, +remaining bytes are marked +.BR ** . +.TP +.B \-d +Interpret unsigned two-byte words in decimal. +.TP +.B \-D +Interpret unsigned four-byte words in decimal. +.TP +.B \-f +Interpret floating point format as single precision. +.TP +.B \-F +Interpret floating point format as double precision. +.TP +.B \-o +Interpret two-byte words in octal. +.TP +.B \-O +Interpret four-byte words in octal. +.TP +.B \-s +Interpret signed two-byte words in decimal. +.TP +.B \-S +Interpret signed four-byte words in decimal. +.TP +.B \-v +Print identical groups of output lines +that immediately follow each other +instead of abbreviating +all but the first one by printing a single +.B * +character. +.TP +.B \-x +Interpret two-byte words in hexadecimal. +.TP +.B \-X +Interpret four-byte words in hexadecimal. +.PP +The following option is supported as an extension: +.TP +.B \-C +Prints the input data interpreted as characters +as another column right next to the regular output. +Non-printable characters are replaced by periods. +.PP +The +.I file +argument specifies which file is to be dumped. +If no file argument is specified, +the standard input is used. +If more than one file argument is specified, +the concatenation of all files +without an intervening separator +is dumped. +.PP +The offset argument specifies the offset +in the file where dumping is to commence. +This argument is normally interpreted +as octal bytes. +If `\fB.\fR' is appended, the offset is interpreted in +decimal. +If `\fBb\fR' is appended, the offset is interpreted in +blocks of 512 bytes. +If the file argument is omitted, +the offset argument must be preceded by +.RB ` + ' +with +.BR /usr/5bin/posix/od . +.PP +The following options have been introduced by POSIX.2. +If any of these options is present, +an offset-like argument +is always interpreted as the name of a file to be dumped. +.TP +\fB\-A \fIaddress_base\fR +Sets the format of the file offset printed in the first column. +Valid values for \fIaddress_base\fR are: +.RS +.TP 3 +.B d +Print offset as decimal. +.TP 3 +.B n +Print no offset column. +.TP 3 +.B o +Print offset as octal (default). +.TP 3 +.B x +Print offset as hexadecimal. +.RE +.TP +\fB\-j \fIskip\fR +Skip +.I skip +bytes of input, +where +.I skip +may be either a decimal number, +an octal number preceded by +.BR 0 , +or a hexadecimal number preceded by +.B 0x +or +.BR 0X . +If the last character of +.I skip +is +.B b +(if not a hexadecimal number), +.BR k , +or +.BR m , +the value is multiplied by +512, 1024, or 1048576, respectively. +.TP +\fB\-N \fIcount\fR +Terminate processing after +.I count +bytes of input, +where +.I count +may be either a decimal number, +an octal number preceded by +.BR 0 , +or a hexadecimal number preceded by +.B 0x +or +.BR 0X . +.TP +\fB\-t \fItype_string\fR +Set output format. +\fItype_string\fR may consist of one or more specifications as follows: +.RS +.TP +\fBa\fR +Print characters, +ignoring the most significant bit. +ASCII control characters are printed as their names, +other nonprintable characters are printed as octal bytes. +.TP +\fBc\fR +Print characters. +Non-printable characters are either printed as escape sequences +`\e0', `\ea', `\eb', `\ef', `\en', `\er', `\et', `\ev' +or as octal bytes. +For a multibyte character, +the graphical representation is printed for its first byte, +remaining bytes are marked +.BR ** . +.TP +\fBd\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print signed decimal words, +with the word length indicated by the second character +(\fB1\fR one byte per word, +\fB2\fR two bytes per word, +\fB4\fR four bytes per word, +\fB8\fR eight bytes per word, +\fBC\fR the size of the C language `char' data type, +\fBS\fR the size of the C language `short' data type, +\fBI\fR the size of the C language `int' data type, +and \fBL\fR the size of the C language `long' data type). +The default is the machine word length. +.TP +\fBf\fR[\fB4\fR|\fB8\fR|\fBF\fR|\fBD\fR|\fBL\fR] +Print floating point format, +with the precision indicated by the second character +(\fB4\fR and \fBF\fR single precision, +\fB8\fR, \fBD\fR, and \fBL\fR double precision). +The default is double precision. +.TP +\fBo\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print octal words, +with the word length indicated by the second character +as described for +.B d +above. +.TP +\fBu\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print unsigned decimal words, +with the word length indicated by the second character +as described for +.B d +above. +.TP +\fBx\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print hexadecimal words, +with the word length indicated by the second character +as described for +.B d +above. +.RE +.PP +Unless the +.B \-N +option is given, +dumping continues until end-of-file. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +for the +.B \-c +and +.B \-tc +options. diff --git a/od/od.c b/od/od.c @@ -0,0 +1,1078 @@ +/* + * od - octal dump + * + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SUS) +static const char sccsid[] USED = "@(#)od_sus.sl 1.26 (gritter) 5/29/05"; +#else +static const char sccsid[] USED = "@(#)od.sl 1.26 (gritter) 5/29/05"; +#endif + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <inttypes.h> +#include <locale.h> +#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <limits.h> +#include "asciitype.h" +#include "atoll.h" + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif /* _IO_getc_unlocked */ +#ifdef _IO_putc_unlocked +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif /* _IO_putc_unlocked */ +#endif /* __GLIBC__ */ + +enum { + BLOCK = 16 +}; + +/* + * An input block. + */ +union block { + char b_c[BLOCK]; + int16_t b_16[8]; + int32_t b_32[4]; + int64_t b_64[2]; + float b_f[4]; + double b_d[2]; +}; + +/* + * Format type as given with the -t option. + */ +struct type { + struct type *t_nxt; /* next type */ + const char *t_prf; /* format string */ + int t_rst; /* rest of multibyte character */ + char t_cnt; /* word size */ + char t_fmt; /* format character */ + char t_pad; /* space padding length */ + char t_000; /* currently unused */ +}; + +/* + * An input buffer. + */ +struct buffer { + union block bu_blk; /* input data */ + int bu_cnt; /* valid bytes in input data */ +}; + +/* + * Maps -t format to printf strings. + */ +static const struct { + char p_cnt; + char p_fmt; + char p_pad; + char p_000; + const char *p_prf; +} prf[] = { + { 4, 'f', 1, 0, " %14.7e" }, + { 8, 'f', 10, 0, " %21.14le" }, + { 1, 'd', 0, 0, " %3d", }, + { 2, 'd', 0, 0, "\205\212", }, + { 4, 'd', 4, 0, "\12\212", }, + { 8, 'd', 10, 0, "\24\212", }, + { 1, 'o', 0, 0, "\3\10" }, + { 2, 'o', 1, 0, "\6\10" }, + { 4, 'o', 4, 0, "\13\10" }, + { 8, 'o', 9, 0, "\26\10" }, + { 1, 'u', 0, 0, "\3\12" }, + { 2, 'u', 2, 0, "\5\12" }, + { 4, 'u', 5, 0, "\12\12" }, + { 8, 'u', 11, 0, "\24\12" }, + { 1, 'x', 1, 0, "\2\20" }, + { 2, 'x', 3, 0, "\4\20" }, + { 4, 'x', 7, 0, "\10\20" }, + { 8, 'x', 15, 0, "\20\20" }, + { 1, 'a', 0, 0, "" }, + { 1, 'c', 0, 0, "" }, + { 1, '\0', 0, 0, "" }, + { 0, 0, 0, 0, NULL } +}; + +static unsigned errcnt; /* count of errors */ +static char *progname; /* argv[0] to main() */ +static int offset_base = 8;/* base of offset to be printed */ +static int offset_oflo = 07777777; /* max offs. in regular width */ +static long long skip; /* skip bytes of input */ +static long long limit = -1; /* print no more bytes than limit */ +static long long total; /* total bytes of input */ +static long long offset; /* offset to print */ +static int vflag; /* print all lines */ +static int Cflag; /* Cray -C option */ +static char **files; /* files to read */ +static const char *skipstr; /* skip format string for error msg */ +static FILE *curfile; /* current file */ +static struct type *types; /* output formats */ +static int mb_cur_max; /* MB_CUR_MAX */ +static int hadinput; /* did actually read from a file */ +static int stretch; /* stretch output columns */ +static int expensive; /* need to compare output lines */ + +/* + * For -t a. + */ +static const char *const ctab_a[] = { + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + " bs", " ht", " nl", " vt", " ff", " cr", " so", " si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", " em", "sub", "esc", " fs", " gs", " rs", " us", + " sp" +}; + +/* + * For -c. + */ +static const char *const ctab_0[] = { + " \\0", "001", "002", "003", "004", "005", "006", "007", + " \\b", " \\t", " \\n", "013", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037", + " " +}; + +/* + * For -t c. + */ +static const char *const ctab_c[] = { + " \\0", "001", "002", "003", "004", "005", "006", " \\a", + " \\b", " \\t", " \\n", " \\v", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037", + " " +}; + +/******************************* HELPERS ********************************/ +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = calloc(nmemb, size)) == NULL) { + write(2, "No storage\n", 11); + exit(077); + } + return p; +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "No storage\n", 11); + exit(077); + } + return p; +} + +/*static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +}*/ + +/******************************* EXECUTION ********************************/ +/* + * Return the next file in the argument list, or NULL if there are + * no further files. + */ +static FILE * +nextfile(void) +{ + FILE *fp; + + if (curfile && curfile != stdin) + fclose(curfile); + do { + if (files == NULL || files[0] == NULL) + return NULL; + if (files[0][0] == '-' && files[0][1] == '\0') { + fp = stdin; + if (limit >= 0) + setvbuf(stdin, NULL, _IONBF, 0); + } else { + if ((fp = fopen(files[0], "r")) == NULL) { + fprintf(stderr, "%s: cannot open %s\n", + progname, files[0]); + errcnt |= 1; + } + } + files++; + } while (fp == NULL); + if (hadinput == 0 && fp != NULL) + hadinput++; + return fp; +} + +/* + * Skip bytes of input. + */ +static void +doskip(void) +{ + while (skip > 0) { + if (curfile == NULL || getc(curfile) == EOF) { + if ((curfile = nextfile()) == NULL) { + fprintf(stderr, "%s: %s is too large.\n", + progname, skipstr); + exit(2); + } + continue; + } + total++; + skip--; + } + if (limit >= 0) + limit += total; +} + +/* + * Fill an input buffer. + */ +static int +fill(struct buffer *bp) +{ + int c, i; + + i = 0; + while (i < sizeof bp->bu_blk && (limit <= 0 || total < limit)) { + if (curfile == NULL || (c = getc(curfile)) == EOF) { + if ((curfile = nextfile()) == NULL) + break; + continue; + } + bp->bu_blk.b_c[i++] = (char)c; + total++; + } + bp->bu_cnt = i; + while (i < sizeof bp->bu_blk) + bp->bu_blk.b_c[i++] = '\0'; + return bp->bu_cnt; +} + +/* + * Print a value to the passed buffer. As 64-bit arithmethics requires + * more than twice the time of 32-bit arithmetics on 32-bit platforms, + * generate different function sets for int, long, and long long. + */ +#define digit(T, type) static size_t \ +T ## digit(char *buf, int size, int base, unsigned type n) \ +{ \ + char *cp; \ + int d; \ +\ + if (size == 0) \ + return 0; \ + cp = buf + T ## digit(buf, size - 1, base, n / base); \ + *cp = (d = n % base) > 9 ? d - 10 + 'a' : d + '0'; \ + return cp - buf + 1; \ +} + +#define number(T, type) static size_t \ +T ## number(char *buf, const char *fmt, unsigned type n) \ +{ \ + int size = fmt[0] & 0377, base = fmt[1] & 0377; \ + int add = 1; \ +\ + buf[0] = ' '; \ + if (size & 0200) { \ + size &= 0177; \ + buf[add++] = ' '; \ + } \ + if (base & 0200) { \ + base &= 0177; \ + if ((type)n < 0) { \ + buf[add] = '-'; \ + n = 0 - (type)n; \ + } else \ + buf[add] = ' '; \ + add++; \ + } \ + return T ## digit(&buf[add], size, base, n) + add; \ +} + +#define mkfuncs(T, type) digit(T, type) number(T, type) + +mkfuncs(i, int) +mkfuncs(l, long) +mkfuncs(ll, long long) + +/* + * Print the offset at the start of each row. + */ +static void +prna(long long addr, int c) +{ + unsigned long long a; + char buf[30]; + int m, n, s; + + if (offset_base != 0) { + if (addr <= offset_oflo) { + /* + * Address fits in 7 characters and is preceded + * by '0' characters. + */ + if (addr > UINT_MAX) + n = lldigit(buf, 7, offset_base, addr); + else + n = idigit(buf, 7, offset_base, addr); + for (m = 0; m < n; m++) + putc(buf[m], stdout); + } else { + /* + * Precompute the length of the address in + * characters if possible (speed improvement). + */ + switch (offset_base) { + case 8: + a = addr; + for (s = 0; a != 0; s++) + a >>= 3; + break; + case 16: + a = addr; + for (s = 0; a != 0; s++) + a >>= 4; + break; + default: + s = sizeof buf; + } + if (addr > UINT_MAX) + n = lldigit(buf, s, offset_base, addr); + else + n = idigit(buf, s, offset_base, addr); + for (m = 0; buf[m] == '0'; m++); + while (m < n) { + putc(buf[m], stdout); + m++; + } + } + } + if (c != '\0') + putc(c, stdout); +} + +/* + * Print a number of output lines, each preceded by the offset column. + */ +static void +prnt(long long addr, const char *s) +{ + int lc = 0; + + do { + if (lc++ == 0) + prna(addr, '\0'); + else + fputs(" ", stdout); + do + putc(*s, stdout); + while (*s++ != '\n'); + } while (*s != '\0'); +} + +/* + * Append a string to a group of output lines, or flush if s == NULL. + */ +static void +put(const char *s) +{ + static char *ob, *Ob; + static size_t os, Os, ol; + static int eq; + + if (s == NULL) { + if (Ob && !vflag && expensive && strcmp(ob, Ob) == 0) { + if (eq++ == 0) + printf("*\n"); + } else { + prnt(offset, ob); + if (ol + 1 > Os) + Ob = srealloc(Ob, Os = ol + 1); + strcpy(Ob, ob); + eq = 0; + } + ol = 0; + } else { + size_t l = strlen(s); + + if (ol + l + 1 >= os) + ob = srealloc(ob, os = ol + l + 1); + strcpy(&ob[ol], s); + ol += l; + } +} + +/* + * Format the data within the buffers according to tp. + */ +static void +format(struct type *tp, struct buffer *b1, struct buffer *b2) +{ + char buf[200]; + int i, j, n, l = 0; + + switch (tp->t_fmt) { + case 'a': + case '\0': + case 'c': + for (i = 0; i < b1->bu_cnt; i++) { + int c = b1->bu_blk.b_c[i] & 0377; + + if (tp->t_fmt == 'a') + c &= 0177; + if (tp->t_rst) { + strcpy(&buf[l], " **"); + tp->t_rst--; + l += 4; + } else if (tp->t_fmt != 'a' && c > 040 && + mb_cur_max > 1) { + char mb[MB_LEN_MAX]; + struct buffer *bp; + int m, n; + wchar_t wc; + + m = i; + bp = b1; + for (n = 0; n < mb_cur_max; n++) { + mb[n] = bp->bu_blk.b_c[m++]; + if (m >= bp->bu_cnt) { + if (bp == b1) { + bp = b2; + m = 0; + } else + break; + } + } + mb[n] = '\0'; + if ((n = mbtowc(&wc, mb, mb_cur_max)) <= 0 + || !iswprint(wc)) + goto spec; + m = wcwidth(wc); + do + buf[l++] = ' '; + while (++m < 4); + for (m = 0; m < n; m++) + buf[l++] = mb[m]; + if (n > 1) + tp->t_rst = n - 1; + } else if (c > 040 && isprint(c)) { + buf[l++] = ' '; + buf[l++] = ' '; + buf[l++] = ' '; + buf[l++] = c; + } else { + spec: if (c <= 040) { + buf[l] = ' '; + switch (tp->t_fmt) { + case 'a': + strcpy(&buf[l+1], ctab_a[c]); + break; + case '\0': + strcpy(&buf[l+1], ctab_0[c]); + break; + case 'c': + strcpy(&buf[l+1], ctab_c[c]); + break; + } + l += 4; + } else if (tp->t_fmt == 'a' && c == '\177') { + strcpy(&buf[l], " del"); + l += 4; + } else + l += inumber(&buf[l], "\3\10", c); + } + } + break; + case 'f': + case 'd': + case 'o': + case 'u': + case 'x': + for (i = 0, n = 0; + i < BLOCK / tp->t_cnt && n < b1->bu_cnt; + i++, n += tp->t_cnt) { + if (stretch) { + for (j = 0; j < tp->t_pad + stretch - 1; j++) + buf[l++] = ' '; + } + if (tp->t_fmt == 'f') { + switch (tp->t_cnt) { + case 4: + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_f[i]); + break; + case 8: + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_d[i]); + break; + } + } else { + switch (tp->t_cnt) { + case 1: + if (tp->t_fmt == 'd') + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_c[i]); + else + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_c[i]&0377); + break; + case 2: + if (tp->t_fmt == 'd') + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_16[i]); + else + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_16[i] & 0177777U); + break; + case 4: + if (tp->t_fmt == 'd') + l += lnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_32[i]); + else + l += lnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_32[i] & + 037777777777UL); + break; + case 8: + if (tp->t_fmt == 'd') + l+= llnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_64[i]); + else + l+= llnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_64[i] & + 01777777777777777777777ULL); + break; + } + } + } + } + if (Cflag && b1->bu_cnt > 0) { + static int max; + int c; + if (max == 0) + max = l * (BLOCK/tp->t_cnt) / + ((b1->bu_cnt+tp->t_cnt-1) / tp->t_cnt); + while (l < max) + buf[l++] = ' '; + buf[l++] = ' '; + for (i = 0; i < b1->bu_cnt || i % 8; i++) { + c = i < b1->bu_cnt ? b1->bu_blk.b_c[i] & 0377 : '.'; + buf[l++] = isprint(c) ? c : '.'; + } + } + buf[l++] = '\n'; + buf[l] = '\0'; + put(buf); +} + +/* + * Main execution loop. Two input buffers are necessary because multibyte + * characters for the -c option do not always end at a buffer boundary. + */ +static void +od(void) +{ + struct buffer b1, b2, *bp, *bq; + struct type *tp; + int star = 0; + + offset = total; + fill(bp = &b1); + fill(bq = &b2); + if (hadinput == 0) + return; + do { + if (star == 0) { + for (tp = types; tp; tp = tp->t_nxt) + format(tp, bp, bq); + put(NULL); + } + offset += bp->bu_cnt; + bp = (bp == &b1 ? &b2 : &b1); + bq = (bq == &b1 ? &b2 : &b1); + /* + * If no multibyte characters are to be printed, identical + * input blocks always lead to identical output lines. It + * is thus not necessary to format them for comparison; + * comparing at this point saves a lot of time for files + * that contain many identical lines. + */ + if (!vflag && !expensive && bp->bu_cnt && + bp->bu_cnt == bq->bu_cnt && + memcmp(bp->bu_blk.b_c, bq->bu_blk.b_c, + bp->bu_cnt) == 0) { + if (star == 0) + printf("*\n"); + star = 1; + } else + star = 0; + } while (fill(bq) > 0 || bp->bu_cnt > 0); + if (total > 0) + prna(total, '\n'); +} + +/*************************** OPTION SCANNING *****************************/ +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-bcdDfFoOsSvxX] [file] [[+]offset[.][b]]\n", + progname); + exit(2); +} + +static void +setfiles(char **av) +{ + if (*av) + files = av; + else { + curfile = stdin; + hadinput = 1; + if (limit >= 0) + setvbuf(stdin, NULL, _IONBF, 0); + } +} + +static void +invarg(int c) +{ + fprintf(stderr, "%s: invalid argument to option -%c\n", progname, c); + usage(); +} + +/* + * Compute output column alignment. + */ +static void +align(void) +{ + struct type *tp, *tq; + + for (tp = types; tp && tp->t_nxt; tp = tp->t_nxt) { + tq = tp->t_nxt; + + if (tp->t_pad != tq->t_pad) { + stretch = 1; + break; + } + } +} + +/* + * Add an element to the list of types. + */ +static void +addtype(char fmt, char cnt) +{ + struct type *tp, *tq; + int i; + + tp = scalloc(1, sizeof *tp); + tp->t_fmt = fmt; + tp->t_cnt = cnt; + for (i = 0; prf[i].p_prf; i++) { + if (prf[i].p_cnt == cnt && prf[i].p_fmt == fmt) { + tp->t_prf = prf[i].p_prf; + tp->t_pad = prf[i].p_pad; + tp->t_000 = prf[i].p_000; + break; + } + } + if (types) { + for (tq = types; tq->t_nxt; tq = tq->t_nxt); + tq->t_nxt = tp; + } else + types = tp; +} + +/* + * Handle the argument to -t. + */ +static int +settype(const char *s) +{ + char fmt, cnt; + + if (s == NULL) { + expensive = mb_cur_max > 1; + addtype('\0', 1); + return 0; + } + while (*s) { + switch (fmt = *s++) { + case 'c': + expensive = mb_cur_max > 1; + /*FALLTHRU*/ + case 'a': + addtype(fmt, 1); + break; + case 'f': + switch (*s) { + case 'F': + case '4': + cnt = 4; + s++; + break; + case 'D': + case 'L': + case '8': + cnt = 8; + s++; + break; + default: + cnt = 8; + } + addtype(fmt, cnt); + break; + case 'd': + case 'o': + case 'u': + case 'x': + switch (*s) { + case '1': + cnt = 1; + s++; + break; + case 'C': + cnt = sizeof (char); + s++; + break; + case '2': + cnt = 2; + s++; + break; + case 'S': + cnt = sizeof (short); + s++; + break; + case '4': + cnt = 4; + s++; + break; + case 'I': + cnt = sizeof (int); + s++; + break; + case '8': + cnt = 8; + s++; + break; + case 'L': + cnt = sizeof (long); + s++; + break; + default: + cnt = sizeof (int); + } + addtype(fmt, cnt); + break; + default: + return -1; + } + } + return 0; +} + +/* + * Handle a traditional offset argument. + */ +static int +setoffset(const char *s) +{ + long long o; + const char *sp; + int base = 8; + int mult = 1; + + skipstr = s; + if (*s == '+') + s++; + for (sp = s; digitchar(*sp & 0377); sp++); + if (sp > s) { + if (*sp == '.') { + base = 10; + sp++; + } + if (*sp == 'b' || *sp == 'B') { + mult = 512; + sp++; + } + if (*sp != '\0') + return -1; + } else + return -1; + o = strtoll(s, NULL, base); + skip = o * mult; + return 0; +} + +/* + * Handle the argument to -j. + */ +static int +setskip(const char *s) +{ + const char *sp = NULL; + long long o; + int base = 10; + int mult = 1; + + skipstr = s; + if (s[0] == '0' && s[1]) { + s++; + if (*s == 'x' || *s == 'X') { + s++; + base = 16; + } else + base = 8; + } + switch (base) { + case 8: + for (sp = s; octalchar(*sp & 0377); sp++); + break; + case 10: + for (sp = s; digitchar(*sp & 0377); sp++); + break; + case 16: + for (sp = s; digitchar(*sp & 0377) || + *sp == 'a' || *sp == 'A' || + *sp == 'b' || *sp == 'B' || + *sp == 'c' || *sp == 'C' || + *sp == 'd' || *sp == 'D' || + *sp == 'e' || *sp == 'E' || + *sp == 'f' || *sp == 'F'; + sp++); + break; + } + if (sp > s) { + switch (*sp) { + case 'b': + mult = 512; + sp++; + break; + case 'k': + mult = 1024; + sp++; + break; + case 'm': + mult = 1048576; + sp++; + break; + case '\0': + break; + default: + return -1; + } + if (*sp != '\0') + return -1; + } else + return -1; + o = strtoull(s, NULL, base); + skip = o * mult; + return 0; +} + +/* + * Handle the argument to -N. + */ +static int +setlimit(const char *s) +{ + long long o; + char *x; + int base = 10; + + if (*s == '0') { + s++; + if (*s == 'x' || *s == 'X') { + s++; + base = 16; + } else + base = 8; + } + o = strtoll(s, &x, base); + if (*x != '\0') + return -1; + limit = o; + return 0; +} + +int +main(int argc, char **argv) +{ + const char optstring[] = ":A:bcCdDfFj:N:oOsSt:vxX"; + int i, newopt = 0;; + + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (sizeof (union block) != BLOCK || mb_cur_max > BLOCK) + abort(); + progname = basename(argv[0]); +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'A': + switch (optarg[0]) { + case 'd': + offset_base = 10; + offset_oflo = 9999999; + break; + case 'o': + offset_base = 8; + offset_oflo = 07777777; + break; + case 'x': + offset_base = 16; + offset_oflo = 0xfffffff; + break; + case 'n': + offset_base = 0; + break; + default: + invarg(i); + } + if (optarg[1] != '\0') + invarg(i); + newopt = 1; + break; + case 'b': + settype("o1"); + break; + case 'c': + settype(NULL); + break; + case 'd': + settype("u2"); + break; + case 'D': + settype("u4"); + break; + case 'f': + settype("fF"); + break; + case 'F': + settype("fD"); + break; + case 'j': + if (setskip(optarg) < 0) + invarg(i); + newopt = 1; + break; + case 'N': + if (setlimit(optarg) < 0) + invarg(i); + newopt = 1; + break; + case 'o': + settype("o2"); + break; + case 'O': + settype("o4"); + break; + case 's': + settype("d2"); + break; + case 'S': + settype("d4"); + break; + case 't': + if (settype(optarg) < 0) + invarg('t'); + newopt = 1; + break; + case 'v': + vflag = 1; + break; + case 'x': + settype("x2"); + break; + case 'X': + settype("x4"); + break; + case ':': + fprintf(stderr, + "%s: option requires an argument -- %c\n", + progname, optopt); + usage(); + case 'C': + Cflag = 1; + break; + case '?': + fprintf(stderr, "%s: bad flag -%c\n", + progname, optopt); + /*FALLTHRU*/ + default: + usage(); + } + } + if (newopt == 0 && ((optind>=argc-2 && argc &&argv[argc-1][0] == '+') || +#ifndef SUS + (optind>=argc-2 && argc && +#else /* SUS */ + (optind == argc-1 && +#endif /* SUS */ + digitchar(argv[argc-1][0] & 0377))) && + setoffset(argv[argc-1]) >= 0) { + argc--; + argv[argc] = NULL; + } + setfiles(argc ? &argv[optind] : &argv[0]); + if (types == NULL) + settype("oS"); + align(); + if (skip > 0) + doskip(); + od(); + return errcnt; +} diff --git a/patch/backupfile.c b/patch/backupfile.c @@ -0,0 +1,246 @@ +/*- + * Copyright (C) 1990 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * backupfile.c -- make Emacs style backup file names + * + * David MacKenzie <djm@ai.mit.edu>. Some algorithms adapted from GNU Emacs. + * + * $OpenBSD: backupfile.c,v 1.20 2009/10/27 23:59:41 deraadt Exp $ + * $FreeBSD$ + */ + +#include <ctype.h> +#include <dirent.h> +#include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "backupfile.h" + + +#define ISDIGIT(c) (isascii ((unsigned char)c) && isdigit ((unsigned char)c)) + +/* Which type of backup file names are generated. */ +enum backup_type backup_type = none; + +/* + * The extension added to file names to produce a simple (as opposed to + * numbered) backup file name. + */ +const char *simple_backup_suffix = "~"; + +static char *concat(const char *, const char *); +static char *make_version_name(const char *, int); +static int max_backup_version(const char *, const char *); +static int version_number(const char *, const char *, size_t); +static int argmatch(const char *, const char **); +static void invalid_arg(const char *, const char *, int); + +/* + * Return the name of the new backup file for file FILE, allocated with + * malloc. Return 0 if out of memory. FILE must not end with a '/' unless it + * is the root directory. Do not call this function if backup_type == none. + */ +char * +find_backup_file_name(const char *file) +{ + char *dir, *base_versions, *tmp_file; + int highest_backup; + + if (backup_type == simple) + return concat(file, simple_backup_suffix); + tmp_file = strdup(file); + if (tmp_file == NULL) + return NULL; + base_versions = concat(basename(tmp_file), ".~"); + free(tmp_file); + if (base_versions == NULL) + return NULL; + tmp_file = strdup(file); + if (tmp_file == NULL) { + free(base_versions); + return NULL; + } + dir = dirname(tmp_file); + if (dir == NULL) { + free(base_versions); + free(tmp_file); + return NULL; + } + highest_backup = max_backup_version(base_versions, dir); + free(base_versions); + free(tmp_file); + if (backup_type == numbered_existing && highest_backup == 0) + return concat(file, simple_backup_suffix); + return make_version_name(file, highest_backup + 1); +} + +/* + * Return the number of the highest-numbered backup file for file FILE in + * directory DIR. If there are no numbered backups of FILE in DIR, or an + * error occurs reading DIR, return 0. FILE should already have ".~" appended + * to it. + */ +static int +max_backup_version(const char *file, const char *dir) +{ + DIR *dirp; + struct dirent *dp; + int highest_version, this_version; + size_t file_name_length; + + dirp = opendir(dir); + if (dirp == NULL) + return 0; + + highest_version = 0; + file_name_length = strlen(file); + + while ((dp = readdir(dirp)) != NULL) { + if (strlen(dp->d_name) <= file_name_length) + continue; + + this_version = version_number(file, dp->d_name, file_name_length); + if (this_version > highest_version) + highest_version = this_version; + } + closedir(dirp); + return highest_version; +} + +/* + * Return a string, allocated with malloc, containing "FILE.~VERSION~". + * Return 0 if out of memory. + */ +static char * +make_version_name(const char *file, int version) +{ + char *backup_name; + + if (asprintf(&backup_name, "%s.~%d~", file, version) == -1) + return NULL; + return backup_name; +} + +/* + * If BACKUP is a numbered backup of BASE, return its version number; + * otherwise return 0. BASE_LENGTH is the length of BASE. BASE should + * already have ".~" appended to it. + */ +static int +version_number(const char *base, const char *backup, size_t base_length) +{ + int version; + const char *p; + + version = 0; + if (!strncmp(base, backup, base_length) && ISDIGIT(backup[base_length])) { + for (p = &backup[base_length]; ISDIGIT(*p); ++p) + version = version * 10 + *p - '0'; + if (p[0] != '~' || p[1]) + version = 0; + } + return version; +} + +/* + * Return the newly-allocated concatenation of STR1 and STR2. If out of + * memory, return 0. + */ +static char * +concat(const char *str1, const char *str2) +{ + char *newstr; + + if (asprintf(&newstr, "%s%s", str1, str2) == -1) + return NULL; + return newstr; +} + +/* + * If ARG is an unambiguous match for an element of the null-terminated array + * OPTLIST, return the index in OPTLIST of the matched element, else -1 if it + * does not match any element or -2 if it is ambiguous (is a prefix of more + * than one element). + */ +static int +argmatch(const char *arg, const char **optlist) +{ + int i; /* Temporary index in OPTLIST. */ + size_t arglen; /* Length of ARG. */ + int matchind = -1; /* Index of first nonexact match. */ + int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */ + + arglen = strlen(arg); + + /* Test all elements for either exact match or abbreviated matches. */ + for (i = 0; optlist[i]; i++) { + if (!strncmp(optlist[i], arg, arglen)) { + if (strlen(optlist[i]) == arglen) + /* Exact match found. */ + return i; + else if (matchind == -1) + /* First nonexact match found. */ + matchind = i; + else + /* Second nonexact match found. */ + ambiguous = 1; + } + } + if (ambiguous) + return -2; + else + return matchind; +} + +/* + * Error reporting for argmatch. KIND is a description of the type of entity + * that was being matched. VALUE is the invalid value that was given. PROBLEM + * is the return value from argmatch. + */ +static void +invalid_arg(const char *kind, const char *value, int problem) +{ + fprintf(stderr, "patch: "); + if (problem == -1) + fprintf(stderr, "invalid"); + else /* Assume -2. */ + fprintf(stderr, "ambiguous"); + fprintf(stderr, " %s `%s'\n", kind, value); +} + +static const char *backup_args[] = { + "never", "simple", "nil", "existing", "t", "numbered", 0 +}; + +static enum backup_type backup_types[] = { + simple, simple, numbered_existing, + numbered_existing, numbered, numbered +}; + +/* + * Return the type of backup indicated by VERSION. Unique abbreviations are + * accepted. + */ +enum backup_type +get_version(const char *version) +{ + int i; + + if (version == NULL || *version == '\0') + return numbered_existing; + i = argmatch(version, backup_args); + if (i >= 0) + return backup_types[i]; + invalid_arg("version control type", version, i); + exit(2); +} diff --git a/patch/backupfile.h b/patch/backupfile.h @@ -0,0 +1,39 @@ +/*- + * Copyright (C) 1990 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * backupfile.h -- declarations for making Emacs style backup file names + * + * $OpenBSD: backupfile.h,v 1.6 2003/07/28 18:35:36 otto Exp $ + * $FreeBSD$ + */ + +/* When to make backup files. */ +enum backup_type { + /* Never make backups. */ + none, + + /* Make simple backups of every file. */ + simple, + + /* + * Make numbered backups of files that already have numbered backups, + * and simple backups of the others. + */ + numbered_existing, + + /* Make numbered backups of every file. */ + numbered +}; + +extern enum backup_type backup_type; +extern const char *simple_backup_suffix; + +char *find_backup_file_name(const char *file); +enum backup_type get_version(const char *version); diff --git a/patch/common.h b/patch/common.h @@ -0,0 +1,119 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER