hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

commit b309b9691d69616afdd09c3b125160ee4c231cba
parent dbb488e76401a5b5c4d42d7b64139f7334748bd5
Author: Daniel Bainton <dpb@driftaway.org>
Date:   Tue, 17 Jun 2014 23:13:45 +0300

Revert "Add sed from obase and remove heirloom sed"

This reverts commit a593d6de7f5774effda8ce9ea5c8cfcc3bed51b2.

The obase sed seems buggy, so reverting back to heirloom sed for now.

Diffstat:
Dsed/POSIX | 199-------------------------------------------------------------------------------
Dsed/TEST/hanoi.sed | 103-------------------------------------------------------------------------------
Dsed/TEST/math.sed | 164-------------------------------------------------------------------------------
Dsed/TEST/sed.test | 549-------------------------------------------------------------------------------
Dsed/compile.c | 858-------------------------------------------------------------------------------
Dsed/defs.h | 148-------------------------------------------------------------------------------
Dsed/extern.h | 56--------------------------------------------------------
Dsed/main.c | 356-------------------------------------------------------------------------------
Dsed/misc.c | 113-------------------------------------------------------------------------------
Msed/mkfile | 7+++----
Dsed/process.c | 615-------------------------------------------------------------------------------
Msed/sed.1 | 883+++++++++++++++++++++++++++++++------------------------------------------------
Ased/sed.h | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed0.c | 1266+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed1.c | 917+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/version.c | 22++++++++++++++++++++++
16 files changed, 2749 insertions(+), 3698 deletions(-)

diff --git a/sed/POSIX b/sed/POSIX @@ -1,199 +0,0 @@ -# $OpenBSD: POSIX,v 1.2 1996/06/26 05:39:04 deraadt Exp $ -# from: @(#)POSIX 8.1 (Berkeley) 6/6/93 - -Comments on the IEEE P1003.2 Draft 12 - Part 2: Shell and Utilities - Section 4.55: sed - Stream editor - -Diomidis Spinellis <dds@doc.ic.ac.uk> -Keith Bostic <bostic@cs.berkeley.edu> - -In the following paragraphs, "wrong" usually means "inconsistent with -historic practice", as most of the following comments refer to -undocumented inconsistencies between the historical versions of sed and -the POSIX 1003.2 standard. All the comments are notes taken while -implementing a POSIX-compatible version of sed, and should not be -interpreted as official opinions or criticism towards the POSIX committee. -All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2. - - 1. 32V and BSD derived implementations of sed strip the text - arguments of the a, c and i commands of their initial blanks, - i.e. - - #!/bin/sed -f - a\ - foo\ - \ indent\ - bar - - produces: - - foo - indent - bar - - POSIX does not specify this behavior as the System V versions of - sed do not do this stripping. The argument against stripping is - that it is difficult to write sed scripts that have leading blanks - if they are stripped. The argument for stripping is that it is - difficult to write readable sed scripts unless indentation is allowed - and ignored, and leading whitespace is obtainable by entering a - backslash in front of it. This implementation follows the BSD - historic practice. - - 2. Historical versions of sed required that the w flag be the last - flag to an s command as it takes an additional argument. This - is obvious, but not specified in POSIX. - - 3. Historical versions of sed required that whitespace follow a w - flag to an s command. This is not specified in POSIX. This - implementation permits whitespace but does not require it. - - 4. Historical versions of sed permitted any number of whitespace - characters to follow the w command. This is not specified in - POSIX. This implementation permits whitespace but does not - require it. - - 5. The rule for the l command differs from historic practice. Table - 2-15 includes the various ANSI C escape sequences, including \\ - for backslash. Some historical versions of sed displayed two - digit octal numbers, too, not three as specified by POSIX. POSIX - is a cleanup, and is followed by this implementation. - - 6. The POSIX specification for ! does not specify that for a single - command the command must not contain an address specification - whereas the command list can contain address specifications. The - specification for ! implies that "3!/hello/p" works, and it never - has, historically. Note, - - 3!{ - /hello/p - } - - does work. - - 7. POSIX does not specify what happens with consecutive ! commands - (e.g. /foo/!!!p). Historic implementations allow any number of - !'s without changing the behaviour. (It seems logical that each - one might reverse the behaviour.) This implementation follows - historic practice. - - 8. Historic versions of sed permitted commands to be separated - by semi-colons, e.g. 'sed -ne '1p;2p;3q' printed the first - three lines of a file. This is not specified by POSIX. - Note, the ; command separator is not allowed for the commands - a, c, i, w, r, :, b, t, # and at the end of a w flag in the s - command. This implementation follows historic practice and - implements the ; separator. - - 9. Historic versions of sed terminated the script if EOF was reached - during the execution of the 'n' command, i.e.: - - sed -e ' - n - i\ - hello - ' </dev/null - - did not produce any output. POSIX does not specify this behavior. - This implementation follows historic practice. - -10. Deleted. - -11. Historical implementations do not output the change text of a c - command in the case of an address range whose first line number - is greater than the second (e.g. 3,1). POSIX requires that the - text be output. Since the historic behavior doesn't seem to have - any particular purpose, this implementation follows the POSIX - behavior. - -12. POSIX does not specify whether address ranges are checked and - reset if a command is not executed due to a jump. The following - program will behave in different ways depending on whether the - 'c' command is triggered at the third line, i.e. will the text - be output even though line 3 of the input will never logically - encounter that command. - - 2,4b - 1,3c\ - text - - Historic implementations, and this implementation, do not output - the text in the above example. The general rule, therefore, - is that a range whose second address is never matched extends to - the end of the input. - -13. Historical implementations allow an output suppressing #n at the - beginning of -e arguments as well as in a script file. POSIX - does not specify this. This implementation follows historical - practice. - -14. POSIX does not explicitly specify how sed behaves if no script is - specified. Since the sed Synopsis permits this form of the command, - and the language in the Description section states that the input - is output, it seems reasonable that it behave like the cat(1) - command. Historic sed implementations behave differently for "ls | - sed", where they produce no output, and "ls | sed -e#", where they - behave like cat. This implementation behaves like cat in both cases. - -15. The POSIX requirement to open all w files at the beginning makes - sed behave nonintuitively when the w commands are preceded by - addresses or are within conditional blocks. This implementation - follows historic practice and POSIX, by default, and provides the - -a option which opens the files only when they are needed. - -16. POSIX does not specify how escape sequences other than \n and \D - (where D is the delimiter character) are to be treated. This is - reasonable, however, it also doesn't state that the backslash is - to be discarded from the output regardless. A strict reading of - POSIX would be that "echo xyz | sed s/./\a" would display "\ayz". - As historic sed implementations always discarded the backslash, - this implementation does as well. - -17. POSIX specifies that an address can be "empty". This implies - that constructs like ",d" or "1,d" and ",5d" are allowed. This - is not true for historic implementations or this implementation - of sed. - -18. The b t and : commands are documented in POSIX to ignore leading - white space, but no mention is made of trailing white space. - Historic implementations of sed assigned different locations to - the labels "x" and "x ". This is not useful, and leads to subtle - programming errors, but it is historic practice and changing it - could theoretically break working scripts. This implementation - follows historic practice. - -19. Although POSIX specifies that reading from files that do not exist - from within the script must not terminate the script, it does not - specify what happens if a write command fails. Historic practice - is to fail immediately if the file cannot be opened or written. - This implementation follows historic practice. - -20. Historic practice is that the \n construct can be used for either - string1 or string2 of the y command. This is not specified by - POSIX. This implementation follows historic practice. - -21. Deleted. - -22. Historic implementations of sed ignore the RE delimiter characters - within character classes. This is not specified in POSIX. This - implementation follows historic practice. - -23. Historic implementations handle empty RE's in a special way: the - empty RE is interpreted as if it were the last RE encountered, - whether in an address or elsewhere. POSIX does not document this - behavior. For example the command: - - sed -e /abc/s//XXX/ - - substitutes XXX for the pattern abc. The semantics of "the last - RE" can be defined in two different ways: - - 1. The last RE encountered when compiling (lexical/static scope). - 2. The last RE encountered while running (dynamic scope). - - While many historical implementations fail on programs depending - on scope differences, the SunOS version exhibited dynamic scope - behaviour. This implementation does dynamic scoping, as this seems - the most useful and in order to remain consistent with historical - practice. diff --git a/sed/TEST/hanoi.sed b/sed/TEST/hanoi.sed @@ -1,103 +0,0 @@ -# $OpenBSD: hanoi.sed,v 1.2 1996/06/26 05:39:09 deraadt Exp $ -# Towers of Hanoi in sed. -# -# from: @(#)hanoi.sed 8.1 (Berkeley) 6/6/93 -# -# -# Ex: -# Run "sed -f hanoi.sed", and enter: -# -# :abcd: : :<CR><CR> -# -# note -- TWO carriage returns, a peculiarity of sed), this will output the -# sequence of states involved in moving 4 rings, the largest called "a" and -# the smallest called "d", from the first to the second of three towers, so -# that the rings on any tower at any time are in descending order of size. -# You can start with a different arrangement and a different number of rings, -# say :ce:b:ax: and it will give the shortest procedure for moving them all -# to the middle tower. The rules are: the names of the rings must all be -# lower-case letters, they must be input within 3 fields (representing the -# towers) and delimited by 4 colons, such that the letters within each field -# are in alphabetical order (i.e. rings are in descending order of size). -# -# For the benefit of anyone who wants to figure out the script, an "internal" -# line of the form -# b:0abx:1a2b3 :2 :3x2 -# has the following meaning: the material after the three markers :1, :2, -# and :3 represents the three towers; in this case the current set-up is -# ":ab : :x :". The numbers after a, b and x in these fields indicate -# that the next time it gets a chance, it will move a to tower 2, move b -# to tower 3, and move x to tower 2. The string after :0 just keeps track -# of the alphabetical order of the names of the rings. The b at the -# beginning means that it is now dealing with ring b (either about to move -# it, or re-evaluating where it should next be moved to). -# -# Although this version is "limited" to 26 rings because of the size of the -# alphabet, one could write a script using the same idea in which the rings -# were represented by arbitrary [strings][within][brackets], and in place of -# the built-in line of the script giving the order of the letters of the -# alphabet, it would accept from the user a line giving the ordering to be -# assumed, e.g. [ucbvax][decvax][hplabs][foo][bar]. -# -# George Bergman -# Math, UC Berkeley 94720 USA - -# cleaning, diagnostics -s/ *//g -/^$/d -/[^a-z:]/{a\ -Illegal characters: use only a-z and ":". Try again. -d -} -/^:[a-z]*:[a-z]*:[a-z]*:$/!{a\ -Incorrect format: use\ -\ : string1 : string2 : string3 :<CR><CR>\ -Try again. -d -} -/\([a-z]\).*\1/{a\ -Repeated letters not allowed. Try again. -d -} -# initial formatting -h -s/[a-z]/ /g -G -s/^:\( *\):\( *\):\( *\):\n:\([a-z]*\):\([a-z]*\):\([a-z]*\):$/:1\4\2\3:2\5\1\3:3\6\1\2:0/ -s/[a-z]/&2/g -s/^/abcdefghijklmnopqrstuvwxyz/ -:a -s/^\(.\).*\1.*/&\1/ -s/.// -/^[^:]/ba -s/\([^0]*\)\(:0.*\)/\2\1:/ -s/^[^0]*0\(.\)/\1&/ -:b -# outputting current state without markers -h -s/.*:1/:/ -s/[123]//gp -g -:c -# establishing destinations -/^\(.\).*\1:1/td -/^\(.\).*:1[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ -/^\(.\).*:1[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ -/^\(.\).*:1[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ -/^\(.\).*:2[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ -/^\(.\).*:2[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ -/^\(.\).*:2[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ -/^\(.\).*:3[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ -/^\(.\).*:3[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ -/^\(.\).*:3[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ -bc -# iterate back to find smallest out-of-place ring -:d -s/^\(.\)\(:0[^:]*\([^:]\)\1.*:\([123]\)[^:]*\1\)\4/\3\2\4/ -td -# move said ring (right, resp. left) -s/^\(.\)\(.*\)\1\([23]\)\(.*:\3[^ ]*\) /\1\2 \4\1\3/ -s/^\(.\)\(.*:\([12]\)[^ ]*\) \(.*\)\1\3/\1\2\1\3\4 / -tb -s/.*/Done! Try another, or end with ^D./p -d diff --git a/sed/TEST/math.sed b/sed/TEST/math.sed @@ -1,164 +0,0 @@ -# $OpenBSD: math.sed,v 1.2 1996/06/26 05:39:10 deraadt Exp $ -# -# from: @(#)math.sed 8.1 (Berkeley) 6/6/93 -# -# Addition and multiplication in sed. -# ++ for a limited time only do (expr) too!!! -# -# Kevin S Braunsdorf, PUCC UNIX Group, ksb@cc.purdue.edu. -# -# Ex: -# echo "4+7*3" | sed -f %f - -# make sure the expression is well formed -s/[ ]//g -/[+*\/-]$/{ - a\ - poorly formed expression, operator on the end - q -} -/^[+*\/]/{ - a\ - poorly formed expression, leading operator - q -} - -# fill hold space with done token -x -s/^.*/done/ -x - -# main loop, process operators (*, + and () ) -: loop -/^\+/{ - s/// - b loop -} -/^\(.*\)(\([^)]*\))\(.*\)$/{ - H - s//\2/ - x - s/^\(.*\)\n\(.*\)(\([^()]*\))\(.*\)$/()\2@\4@\1/ - x - b loop -} -/^[0-9]*\*/b mul -/^\([0-9]*\)\+\([0-9+*]*\*[0-9]*\)$/{ - s//\2+\1/ - b loop -} -/^[0-9]*\+/{ - s/$/=/ - b add -} -x -/^done$/{ - x - p - d -} -/^()/{ - s/// - x - G - s/\(.*\)\n\([^@]*\)@\([^@]*\)@\(.*\)/\2\1\3/ - x - s/[^@]*@[^@]*@\(.*\)/\1/ - x - b loop -} -i\ -help, stack problem -p -x -p -q - -# turn mul into add until 1*x -> x -: mul -/^0*1\*/{ - s/// - b loop -} -/^\([0-9]*\)0\*/{ - s/^\([0-9]*\)0\*\([0-9]*\)/\1*\20/ - b mul -} -s/^\([0-9]*\)1\*/\10*/ -s/^\([0-9]*\)2\*/\11*/ -s/^\([0-9]*\)3\*/\12*/ -s/^\([0-9]*\)4\*/\13*/ -s/^\([0-9]*\)5\*/\14*/ -s/^\([0-9]*\)6\*/\15*/ -s/^\([0-9]*\)7\*/\16*/ -s/^\([0-9]*\)8\*/\17*/ -s/^\([0-9]*\)9\*/\18*/ -s/\*\([0-9*]*\)/*\1+\1/ -b mul - -# get rid of a plus term until 0+x -> x -: add -/^\+\([0-9+*]*\)=/{ - s//\1/ - b loop -} -/^\([0-9*]*\)\+=/{ - s//\1/ - b loop -} -/^\([0-9]*\)\+\([0-9*+]*\)\+=/{ - s//\2+\1/ - b loop -} -/^\([0-9]*\)0\+\([0-9]*\)\([0-9]\)=/{ - s//\1+\2=\3/ - b add -} -/^\([0-9]*\)\([0-9]\)\+\([0-9]*\)0=/{ - s//\1+\3=\2/ - b add -} -/^\([0-9]*\)0\+\([0-9*+]*\)\+\([0-9]*\)\([0-9]\)=/{ - s//\1+\2+\3=\4/ - b add -} -/^\([0-9]*\)\([0-9]\)\+\([0-9*+]*\)\+\([0-9]*\)0=/{ - s//\1+\3+\4=\2/ - b add -} -s/^\([0-9]*\)1\+/\10+/ -s/^\([0-9]*\)2\+/\11+/ -s/^\([0-9]*\)3\+/\12+/ -s/^\([0-9]*\)4\+/\13+/ -s/^\([0-9]*\)5\+/\14+/ -s/^\([0-9]*\)6\+/\15+/ -s/^\([0-9]*\)7\+/\16+/ -s/^\([0-9]*\)8\+/\17+/ -s/^\([0-9]*\)9\+/\18+/ - -s/9=\([0-9]*\)$/_=\1/ -s/8=\([0-9]*\)$/9=\1/ -s/7=\([0-9]*\)$/8=\1/ -s/6=\([0-9]*\)$/7=\1/ -s/5=\([0-9]*\)$/6=\1/ -s/4=\([0-9]*\)$/5=\1/ -s/3=\([0-9]*\)$/4=\1/ -s/2=\([0-9]*\)$/3=\1/ -s/1=\([0-9]*\)$/2=\1/ -/_/{ - s//_0/ - : inc - s/9_/_0/ - s/8_/9/ - s/7_/8/ - s/6_/7/ - s/5_/6/ - s/4_/5/ - s/3_/4/ - s/2_/3/ - s/1_/2/ - s/0_/1/ - s/\+_/+1/ - /_/b inc -} -b add diff --git a/sed/TEST/sed.test b/sed/TEST/sed.test @@ -1,549 +0,0 @@ -#!/bin/sh - -# $OpenBSD: sed.test,v 1.4 2008/10/07 15:02:45 millert Exp $ -# -# Copyright (c) 1992 Diomidis Spinellis. -# Copyright (c) 1992, 1993 -# The Regents of the University of California. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. Neither the name of the University nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# from: @(#)sed.test 8.1 (Berkeley) 6/6/93 -# - -# sed Regression Tests -# -# The following files are created: -# lines[1-4], script1, script2 -# Two directories *.out contain the test results - -main() -{ - BASE=/usr/bin/sed - BASELOG=sed.out - TEST=../obj/sed - TESTLOG=nsed.out - DICT=/usr/share/dict/words - - test_error | more - - awk 'END { for (i = 1; i < 15; i++) print "l1_" i}' </dev/null >lines1 - awk 'END { for (i = 1; i < 10; i++) print "l2_" i}' </dev/null >lines2 - - exec 4>&1 5>&2 - - # Set these flags to get messages about known problems - BSD=0 - GNU=0 - SUN=0 - tests $BASE $BASELOG - - BSD=0 - GNU=0 - SUN=0 - tests $TEST $TESTLOG - exec 1>&4 2>&5 - diff -c $BASELOG $TESTLOG | more -} - -tests() -{ - SED=$1 - DIR=$2 - rm -rf $DIR - mkdir $DIR - MARK=100 - - test_args - test_addr - echo Testing commands - test_group - test_acid - test_branch - test_pattern - test_print - test_subst -} - -mark() -{ - MARK=`expr $MARK + 1` - exec 1>&4 2>&5 - exec >"$DIR/${MARK}_$1" - echo "Test $1:$MARK" - # Uncomment this line to match tests with sed error messages - echo "Test $1:$MARK" >&5 -} - -test_args() -{ - mark '1.1' - echo Testing argument parsing - echo First type - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED 's/^/e1_/p' lines1 - fi - mark '1.2' ; $SED -n 's/^/e1_/p' lines1 - mark '1.3' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED 's/^/e1_/p' <lines1 - fi - mark '1.4' ; $SED -n 's/^/e1_/p' <lines1 - echo Second type - mark '1.4.1' - if [ $SUN -eq 1 ] ; then - echo SunOS sed fails this - fi - $SED -e '' <lines1 - echo 's/^/s1_/p' >script1 - echo 's/^/s2_/p' >script2 - mark '1.5' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -f script1 lines1 - fi - mark '1.6' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -f script1 <lines1 - fi - mark '1.7' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -e 's/^/e1_/p' lines1 - fi - mark '1.8' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -e 's/^/e1_/p' <lines1 - fi - mark '1.9' ; $SED -n -f script1 lines1 - mark '1.10' ; $SED -n -f script1 <lines1 - mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1 - mark '1.12' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -n -e 's/^/e1_/p' <lines1 - fi - mark '1.13' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1 - fi - mark '1.14' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -f script1 -f script2 lines1 - fi - mark '1.15' - if [ $GNU -eq 1 -o $SUN -eq 1 ] ; then - echo GNU and SunOS sed fail this following older POSIX draft - else - $SED -e 's/^/e1_/p' -f script1 lines1 - fi - mark '1.16' - if [ $SUN -eq 1 ] ; then - echo SunOS sed prints only with -n - else - $SED -e 's/^/e1_/p' lines1 lines1 - fi - # POSIX D11.2:11251 - mark '1.17' ; $SED p <lines1 lines1 -cat >script1 <<EOF -#n -# A comment - -p -EOF - mark '1.18' ; $SED -f script1 <lines1 lines1 -} - -test_addr() -{ - echo Testing address ranges - mark '2.1' ; $SED -n -e '4p' lines1 - mark '2.2' ; $SED -n -e '20p' lines1 lines2 - mark '2.3' ; $SED -n -e '$p' lines1 - mark '2.4' ; $SED -n -e '$p' lines1 lines2 - mark '2.5' ; $SED -n -e '$a\ -hello' /dev/null - mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2 - # Should not print anything - mark '2.7' ; $SED -n -e '20p' lines1 - mark '2.8' ; $SED -n -e '0p' lines1 - mark '2.9' ; $SED -n '/l1_7/p' lines1 - mark '2.10' ; $SED -n ' /l1_7/ p' lines1 - mark '2.11' - if [ $BSD -eq 1 ] ; then - echo BSD sed fails this test - fi - if [ $GNU -eq 1 ] ; then - echo GNU sed fails this - fi - $SED -n '\_l1\_7_p' lines1 - mark '2.12' ; $SED -n '1,4p' lines1 - mark '2.13' ; $SED -n '1,$p' lines1 lines2 - mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2 - mark '2.15' ; $SED -n '/4/,$p' lines1 lines2 - mark '2.16' ; $SED -n '/4/,20p' lines1 lines2 - mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2 - mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2 - mark '2.19' - if [ $GNU -eq 1 ] ; then - echo GNU sed fails this - fi - $SED -n '12,3p' lines1 lines2 - mark '2.20' - if [ $GNU -eq 1 ] ; then - echo GNU sed fails this - fi - $SED -n '/l1_7/,3p' lines1 lines2 -} - -test_group() -{ - echo Brace and other grouping - mark '3.1' ; $SED -e ' -4,12 { - s/^/^/ - s/$/$/ - s/_/T/ -}' lines1 - mark '3.2' ; $SED -e ' -4,12 { - s/^/^/ - /6/,/10/ { - s/$/$/ - /8/ s/_/T/ - } -}' lines1 - mark '3.3' ; $SED -e ' -4,12 !{ - s/^/^/ - /6/,/10/ !{ - s/$/$/ - /8/ !s/_/T/ - } -}' lines1 - mark '3.4' ; $SED -e '4,12!s/^/^/' lines1 -} - -test_acid() -{ - echo Testing a c d and i commands - mark '4.1' ; $SED -n -e ' -s/^/before_i/p -20i\ -inserted -s/^/after_i/p -' lines1 lines2 - mark '4.2' ; $SED -n -e ' -5,12s/^/5-12/ -s/^/before_a/p -/5-12/a\ -appended -s/^/after_a/p -' lines1 lines2 - mark '4.3' - if [ $GNU -eq 1 ] ; then - echo GNU sed fails this - fi - $SED -n -e ' -s/^/^/p -/l1_/a\ -appended -8,10N -s/$/$/p -' lines1 lines2 - mark '4.4' ; $SED -n -e ' -c\ -hello -' lines1 - mark '4.5' ; $SED -n -e ' -8c\ -hello -' lines1 - mark '4.6' ; $SED -n -e ' -3,14c\ -hello -' lines1 -# SunOS and GNU sed behave differently. We follow POSIX -# mark '4.7' ; $SED -n -e ' -#8,3c\ -#hello -#' lines1 - mark '4.8' ; $SED d <lines1 -} - -test_branch() -{ - echo Testing labels and branching - mark '5.1' ; $SED -n -e ' -b label4 -:label3 -s/^/label3_/p -b end -:label4 -2,12b label1 -b label2 -:label1 -s/^/label1_/p -b -:label2 -s/^/label2_/p -b label3 -:end -' lines1 - mark '5.2' - if [ $BSD -eq 1 ] ; then - echo BSD sed fails this test - fi - $SED -n -e ' -s/l1_/l2_/ -t ok -b -:ok -s/^/tested /p -' lines1 lines2 -# SunOS sed behaves differently here. Clarification needed. -# mark '5.3' ; $SED -n -e ' -#5,8b inside -#1,5 { -# s/^/^/p -# :inside -# s/$/$/p -#} -#' lines1 -# Check that t clears the substitution done flag - mark '5.4' ; $SED -n -e ' -1,8s/^/^/ -t l1 -:l1 -t l2 -s/$/$/p -b -:l2 -s/^/ERROR/ -' lines1 -# Check that reading a line clears the substitution done flag - mark '5.5' - if [ $BSD -eq 1 ] ; then - echo BSD sed fails this test - fi - $SED -n -e ' -t l2 -1,8s/^/^/p -2,7N -b -:l2 -s/^/ERROR/p -' lines1 - mark '5.6' ; $SED 5q lines1 - mark '5.7' ; $SED -e ' -5i\ -hello -5q' lines1 -# Branch across block boundary - mark '5.8' ; $SED -e ' -{ -:b -} -s/l/m/ -tb' lines1 -} - -test_pattern() -{ -echo Pattern space commands -# Check that the pattern space is deleted - mark '6.1' ; $SED -n -e ' -c\ -changed -p -' lines1 - mark '6.2' ; $SED -n -e ' -4d -p -' lines1 -# SunOS sed refused to print here -# mark '6.3' ; $SED -e ' -#N -#N -#N -#D -#P -#4p -#' lines1 - mark '6.4' ; $SED -e ' -2h -3H -4g -5G -6x -6p -6x -6p -' lines1 - mark '6.5' ; $SED -e '4n' lines1 - mark '6.6' ; $SED -n -e '4n' lines1 -} - -test_print() -{ - echo Testing print and file routines - awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \ - </dev/null >lines3 - # GNU and SunOS sed behave differently here - mark '7.1' - if [ $BSD -eq 1 ] ; then - echo 'BSD sed drops core on this one; TEST SKIPPED' - else - $SED -n l lines3 - fi - mark '7.2' ; $SED -e '/l2_/=' lines1 lines2 - rm -f lines4 - mark '7.3' ; $SED -e '3,12w lines4' lines1 - echo w results - cat lines4 - mark '7.4' ; $SED -e '4r lines2' lines1 - mark '7.5' ; $SED -e '5r /dev/dds' lines1 - mark '7.6' ; $SED -e '6r /dev/null' lines1 - mark '7.7' - if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then - echo BSD, GNU and SunOS cannot pass this one - else - sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1 - rm -rf tmpdir - mkdir tmpdir - $SED -f script1 lines1 - cat tmpdir/* - rm -rf tmpdir - fi - mark '7.8' - if [ $BSD -eq 1 ] ; then - echo BSD sed cannot pass 7.7 - else - echo line1 > lines3 - echo "" >> lines3 - $SED -n -e '$p' lines3 /dev/null - fi - -} - -test_subst() -{ - echo Testing substitution commands - mark '8.1' ; $SED -e 's/./X/g' lines1 - mark '8.2' ; $SED -e 's,.,X,g' lines1 -# GNU and SunOS sed thinks we are escaping . as wildcard, not as separator -# mark '8.3' ; $SED -e 's.\..X.g' lines1 -# POSIX does not say that this should work -# mark '8.4' ; $SED -e 's/[/]/Q/' lines1 - mark '8.4' ; $SED -e 's/[\/]/Q/' lines1 - mark '8.5' ; $SED -e 's_\__X_' lines1 - mark '8.6' ; $SED -e 's/./(&)/g' lines1 - mark '8.7' ; $SED -e 's/./(\&)/g' lines1 - mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1 - mark '8.9' ; $SED -e 's/_/u0\ -u1\ -u2/g' lines1 - mark '8.10' - if [ $BSD -eq 1 -o $GNU -eq 1 ] ; then - echo 'BSD/GNU sed do not understand digit flags on s commands' - fi - $SED -e 's/./X/4' lines1 - rm -f lines4 - mark '8.11' ; $SED -e 's/1/X/w lines4' lines1 - echo s wfile results - cat lines4 - mark '8.12' ; $SED -e 's/[123]/X/g' lines1 - mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1 - mark '8.14' ; - if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then - echo BSD/GNU/SUN sed fail this test - else - $SED -e 'y10\123456789198765432\101' lines1 - fi - mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1 - mark '8.16' - if [ $BSD -eq 1 ] ; then - echo 'BSD sed does not handle branch defined REs' - else - echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \ - -e 's//Y/p' -e '/f/bx' - fi -} - -test_error() -{ - exec 0>&3 4>&1 5>&2 - exec 0</dev/null - exec 2>&1 - set -x - $TEST -x && exit 1 - $TEST -f && exit 1 - $TEST -e && exit 1 - $TEST -f /dev/dds && exit 1 - $TEST p /dev/dds && exit 1 - $TEST -f /bin/sh && exit 1 - $TEST '{' && exit 1 - $TEST '{' && exit 1 - $TEST '/hello/' && exit 1 - $TEST '1,/hello/' && exit 1 - $TEST -e '-5p' && exit 1 - $TEST '/jj' && exit 1 - $TEST 'a hello' && exit 1 - $TEST 'a \ hello' && exit 1 - $TEST 'b foo' && exit 1 - $TEST 'd hello' && exit 1 - $TEST 's/aa' && exit 1 - $TEST 's/aa/' && exit 1 - $TEST 's/a/b' && exit 1 - $TEST 's/a/b/c/d' && exit 1 - $TEST 's/a/b/ 1 2' && exit 1 - $TEST 's/a/b/ 1 g' && exit 1 - $TEST 's/a/b/w' && exit 1 - $TEST 'y/aa' && exit 1 - $TEST 'y/aa/b/' && exit 1 - $TEST 'y/aa/' && exit 1 - $TEST 'y/a/b' && exit 1 - $TEST 'y/a/b/c/d' && exit 1 - $TEST '!' && exit 1 - $TEST supercalifrangolisticexprialidociussupercalifrangolisticexcius - set +x - exec 0>&3 1>&4 2>&5 -} - -main diff --git a/sed/compile.c b/sed/compile.c @@ -1,858 +0,0 @@ -/* $OpenBSD: compile.c,v 1.34 2010/11/15 20:26:00 millert Exp $ */ - -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/types.h> -#include <sys/stat.h> - -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <regex.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "defs.h" -#include "extern.h" - -#define LHSZ 128 -#define LHMASK (LHSZ - 1) -static struct labhash { - struct labhash *lh_next; - u_int lh_hash; - struct s_command *lh_cmd; - int lh_ref; -} *labels[LHSZ]; - -static char *compile_addr(char *, struct s_addr *); -static char *compile_ccl(char **, char *); -static char *compile_delimited(char *, char *, int); -static char *compile_flags(char *, struct s_subst *); -static char *compile_re(char *, regex_t **); -static char *compile_subst(char *, struct s_subst *); -static char *compile_text(void); -static char *compile_tr(char *, char **); -static struct s_command - **compile_stream(struct s_command **); -static char *duptoeol(char *, char *, char **); -static void enterlabel(struct s_command *); -static struct s_command - *findlabel(char *); -static void fixuplabel(struct s_command *, struct s_command *); -static void uselabel(void); - -/* - * Command specification. This is used to drive the command parser. - */ -struct s_format { - char code; /* Command code */ - int naddr; /* Number of address args */ - enum e_args args; /* Argument type */ -}; - -static struct s_format cmd_fmts[] = { - {'{', 2, GROUP}, - {'}', 0, ENDGROUP}, - {'a', 1, TEXT}, - {'b', 2, BRANCH}, - {'c', 2, TEXT}, - {'d', 2, EMPTY}, - {'D', 2, EMPTY}, - {'g', 2, EMPTY}, - {'G', 2, EMPTY}, - {'h', 2, EMPTY}, - {'H', 2, EMPTY}, - {'i', 1, TEXT}, - {'l', 2, EMPTY}, - {'n', 2, EMPTY}, - {'N', 2, EMPTY}, - {'p', 2, EMPTY}, - {'P', 2, EMPTY}, - {'q', 1, EMPTY}, - {'r', 1, RFILE}, - {'s', 2, SUBST}, - {'t', 2, BRANCH}, - {'w', 2, WFILE}, - {'x', 2, EMPTY}, - {'y', 2, TR}, - {'!', 2, NONSEL}, - {':', 0, LABEL}, - {'#', 0, COMMENT}, - {'=', 1, EMPTY}, - {'\0', 0, COMMENT}, -}; - -/* The compiled program. */ -struct s_command *prog; - -/* - * Compile the program into prog. - * Initialise appends. - */ -void -compile(void) -{ - *compile_stream(&prog) = NULL; - fixuplabel(prog, NULL); - uselabel(); - appends = xmalloc(sizeof(struct s_appends) * appendnum); - match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); -} - -#define EATSPACE() do { \ - if (p) \ - while (isascii(*p) && isspace(*p)) \ - p++; \ - } while (0) - -static struct s_command ** -compile_stream(struct s_command **link) -{ - char *p; - static char *lbuf; /* To avoid excessive malloc calls */ - static size_t bufsize; - struct s_command *cmd, *cmd2, *stack; - struct s_format *fp; - int naddr; /* Number of addresses */ - - stack = 0; - for (;;) { - if ((p = cu_fgets(&lbuf, &bufsize)) == NULL) { - if (stack != 0) - err(COMPILE, "unexpected EOF (pending }'s)"); - return (link); - } - -semicolon: EATSPACE(); - if (*p == '#' || *p == '\0') - continue; - if (*p == ';') { - p++; - goto semicolon; - } - *link = cmd = xmalloc(sizeof(struct s_command)); - link = &cmd->next; - cmd->nonsel = cmd->inrange = 0; - /* First parse the addresses */ - naddr = 0; - -/* Valid characters to start an address */ -#define addrchar(c) (strchr("0123456789/\\$", (c))) - if (addrchar(*p)) { - naddr++; - cmd->a1 = xmalloc(sizeof(struct s_addr)); - p = compile_addr(p, cmd->a1); - EATSPACE(); /* EXTENSION */ - if (*p == ',') { - p++; - EATSPACE(); /* EXTENSION */ - naddr++; - cmd->a2 = xmalloc(sizeof(struct s_addr)); - p = compile_addr(p, cmd->a2); - EATSPACE(); - } else { - cmd->a2 = 0; - } - } else { - cmd->a1 = cmd->a2 = 0; - } - -nonsel: /* Now parse the command */ - if (!*p) - err(COMPILE, "command expected"); - cmd->code = *p; - for (fp = cmd_fmts; fp->code; fp++) - if (fp->code == *p) - break; - if (!fp->code) - err(COMPILE, "invalid command code %c", *p); - if (naddr > fp->naddr) - err(COMPILE, - "command %c expects up to %d address(es), found %d", - *p, fp->naddr, naddr); - switch (fp->args) { - case NONSEL: /* ! */ - p++; - EATSPACE(); - cmd->nonsel = ! cmd->nonsel; - goto nonsel; - case GROUP: /* { */ - p++; - EATSPACE(); - cmd->next = stack; - stack = cmd; - link = &cmd->u.c; - if (*p) - goto semicolon; - break; - case ENDGROUP: - /* - * Short-circuit command processing, since end of - * group is really just a noop. - */ - cmd->nonsel = 1; - if (stack == 0) - err(COMPILE, "unexpected }"); - cmd2 = stack; - stack = cmd2->next; - cmd2->next = cmd; - /*FALLTHROUGH*/ - case EMPTY: /* d D g G h H l n N p P q x = \0 */ - p++; - EATSPACE(); - if (*p == ';') { - p++; - link = &cmd->next; - goto semicolon; - } - if (*p) - err(COMPILE, -"extra characters at the end of %c command", cmd->code); - break; - case TEXT: /* a c i */ - p++; - EATSPACE(); - if (*p != '\\') - err(COMPILE, "command %c expects \\ followed by" - " text", cmd->code); - p++; - EATSPACE(); - if (*p) - err(COMPILE, "extra characters after \\ at the" - " end of %c command", cmd->code); - cmd->t = compile_text(); - break; - case COMMENT: /* \0 # */ - break; - case WFILE: /* w */ - p++; - EATSPACE(); - if (*p == '\0') - err(COMPILE, "filename expected"); - cmd->t = duptoeol(p, "w command", NULL); - if (aflag) - cmd->u.fd = -1; - else if ((cmd->u.fd = open(p, - O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, - DEFFILEMODE)) == -1) - err(FATAL, "%s: %s", p, strerror(errno)); - break; - case RFILE: /* r */ - p++; - EATSPACE(); - cmd->t = duptoeol(p, "read command", NULL); - break; - case BRANCH: /* b t */ - p++; - EATSPACE(); - if (*p == '\0') - cmd->t = NULL; - else - cmd->t = duptoeol(p, "branch", &p); - if (*p == ';') { - p++; - goto semicolon; - } - break; - case LABEL: /* : */ - p++; - EATSPACE(); - cmd->t = duptoeol(p, "label", &p); - if (strlen(cmd->t) == 0) - err(COMPILE, "empty label"); - enterlabel(cmd); - if (*p == ';') { - p++; - goto semicolon; - } - break; - case SUBST: /* s */ - p++; - if (*p == '\0' || *p == '\\') - err(COMPILE, "substitute pattern can not be" - " delimited by newline or backslash"); - cmd->u.s = xmalloc(sizeof(struct s_subst)); - p = compile_re(p, &cmd->u.s->re); - if (p == NULL) - err(COMPILE, "unterminated substitute pattern"); - --p; - p = compile_subst(p, cmd->u.s); - p = compile_flags(p, cmd->u.s); - EATSPACE(); - if (*p == ';') { - p++; - link = &cmd->next; - goto semicolon; - } - break; - case TR: /* y */ - p++; - p = compile_tr(p, (char **)&cmd->u.y); - EATSPACE(); - if (*p == ';') { - p++; - link = &cmd->next; - goto semicolon; - } - if (*p) - err(COMPILE, "extra text at the end of a" - " transform command"); - break; - } - } -} - -/* - * Get a delimited string. P points to the delimeter of the string; d points - * to a buffer area. Newline and delimiter escapes are processed; other - * escapes are ignored. - * - * Returns a pointer to the first character after the final delimiter or NULL - * in the case of a non-terminated string. The character array d is filled - * with the processed string. - */ -static char * -compile_delimited(char *p, char *d, int is_tr) -{ - char c; - - c = *p++; - if (c == '\0') - return (NULL); - else if (c == '\\') - err(COMPILE, "\\ can not be used as a string delimiter"); - else if (c == '\n') - err(COMPILE, "newline can not be used as a string delimiter"); - while (*p) { - if (*p == '[' && *p != c) { - if ((d = compile_ccl(&p, d)) == NULL) - err(COMPILE, "unbalanced brackets ([])"); - continue; - } else if (*p == '\\' && p[1] == '[') { - *d++ = *p++; - } else if (*p == '\\' && p[1] == c) { - p++; - } else if (*p == '\\' && p[1] == 'n') { - *d++ = '\n'; - p += 2; - continue; - } else if (*p == '\\' && p[1] == '\\') { - if (is_tr) - p++; - else - *d++ = *p++; - } else if (*p == c) { - *d = '\0'; - return (p + 1); - } - *d++ = *p++; - } - return (NULL); -} - - -/* compile_ccl: expand a POSIX character class */ -static char * -compile_ccl(char **sp, char *t) -{ - int c, d; - char *s = *sp; - - *t++ = *s++; - if (*s == '^') - *t++ = *s++; - if (*s == ']') - *t++ = *s++; - for (; *s && (*t = *s) != ']'; s++, t++) - if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { - *++t = *++s, t++, s++; - for (c = *s; (*t = *s) != ']' || c != d; s++, t++) - if ((c = *s) == '\0') - return NULL; - } else if (*s == '\\' && s[1] == 'n') { - *t = '\n'; - s++; - } - if (*s == ']') { - *sp = ++s; - return (++t); - } else { - return (NULL); - } -} - -/* - * Get a regular expression. P points to the delimiter of the regular - * expression; repp points to the address of a regexp pointer. Newline - * and delimiter escapes are processed; other escapes are ignored. - * Returns a pointer to the first character after the final delimiter - * or NULL in the case of a non terminated regular expression. The regexp - * pointer is set to the compiled regular expression. - * Cflags are passed to regcomp. - */ -static char * -compile_re(char *p, regex_t **repp) -{ - int eval; - char *re; - - re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */ - p = compile_delimited(p, re, 0); - if (p && strlen(re) == 0) { - *repp = NULL; - free(re); - return (p); - } - *repp = xmalloc(sizeof(regex_t)); - if (p && (eval = regcomp(*repp, re, Eflag ? REG_EXTENDED : 0)) != 0) - err(COMPILE, "RE error: %s", strregerror(eval, *repp)); - if (maxnsub < (*repp)->re_nsub) - maxnsub = (*repp)->re_nsub; - free(re); - return (p); -} - -/* - * Compile the substitution string of a regular expression and set res to - * point to a saved copy of it. Nsub is the number of parenthesized regular - * expressions. - */ -static char * -compile_subst(char *p, struct s_subst *s) -{ - static char *lbuf; - static size_t bufsize; - int asize, ref, size; - char c, *text, *op, *sp; - int sawesc = 0; - - c = *p++; /* Terminator character */ - if (c == '\0') - return (NULL); - - s->maxbref = 0; - s->linenum = linenum; - text = NULL; - asize = size = 0; - do { - size_t len = ROUNDLEN(strlen(p) + 1); - if (asize - size < len) { - do { - asize += len; - } while (asize - size < len); - text = xrealloc(text, asize); - } - op = sp = text + size; - for (; *p; p++) { - if (*p == '\\' || sawesc) { - /* - * If this is a continuation from the last - * buffer, we won't have a character to - * skip over. - */ - if (sawesc) - sawesc = 0; - else - p++; - - if (*p == '\0') { - /* - * This escaped character is continued - * in the next part of the line. Note - * this fact, then cause the loop to - * exit w/ normal EOL case and reenter - * above with the new buffer. - */ - sawesc = 1; - p--; - continue; - } else if (strchr("123456789", *p) != NULL) { - *sp++ = '\\'; - ref = *p - '0'; - if (s->re != NULL && - ref > s->re->re_nsub) - err(COMPILE, -"\\%c not defined in the RE", *p); - if (s->maxbref < ref) - s->maxbref = ref; - } else if (*p == '&' || *p == '\\') - *sp++ = '\\'; - } else if (*p == c) { - p++; - *sp++ = '\0'; - size += sp - op; - s->new = xrealloc(text, size); - return (p); - } else if (*p == '\n') { - err(COMPILE, -"unescaped newline inside substitute pattern"); - /* NOTREACHED */ - } - *sp++ = *p; - } - size += sp - op; - } while ((p = cu_fgets(&lbuf, &bufsize))); - err(COMPILE, "unterminated substitute in regular expression"); - /* NOTREACHED */ -} - -/* - * Compile the flags of the s command - */ -static char * -compile_flags(char *p, struct s_subst *s) -{ - int gn; /* True if we have seen g or n */ - long l; - char wfile[PATH_MAX], *q; - - s->n = 1; /* Default */ - s->p = 0; - s->wfile = NULL; - s->wfd = -1; - for (gn = 0;;) { - EATSPACE(); /* EXTENSION */ - switch (*p) { - case 'g': - if (gn) - err(COMPILE, "more than one number or 'g' in" - " substitute flags"); - gn = 1; - s->n = 0; - break; - case '\0': - case '\n': - case ';': - return (p); - case 'p': - s->p = 1; - break; - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - if (gn) - err(COMPILE, "more than one number or 'g' in" - " substitute flags"); - gn = 1; - l = strtol(p, &p, 10); - if (l <= 0 || l >= INT_MAX) - err(COMPILE, - "number in substitute flags out of range"); - s->n = (int)l; - continue; - case 'w': - p++; -#ifdef HISTORIC_PRACTICE - if (*p != ' ') { - err(WARNING, "space missing before w wfile"); - return (p); - } -#endif - EATSPACE(); - q = wfile; - while (*p) { - if (*p == '\n') - break; - *q++ = *p++; - } - *q = '\0'; - if (q == wfile) - err(COMPILE, "no wfile specified"); - s->wfile = strdup(wfile); - if (!aflag && (s->wfd = open(wfile, - O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, - DEFFILEMODE)) == -1) - err(FATAL, "%s: %s", wfile, strerror(errno)); - return (p); - default: - err(COMPILE, - "bad flag in substitute command: '%c'", *p); - break; - } - p++; - } -} - -/* - * Compile a translation set of strings into a lookup table. - */ -static char * -compile_tr(char *p, char **transtab) -{ - int i; - char *lt, *op, *np; - char *old = NULL, *new = NULL; - - if (*p == '\0' || *p == '\\') - err(COMPILE, -"transform pattern can not be delimited by newline or backslash"); - old = xmalloc(strlen(p) + 1); - p = compile_delimited(p, old, 1); - if (p == NULL) { - err(COMPILE, "unterminated transform source string"); - goto bad; - } - new = xmalloc(strlen(p) + 1); - p = compile_delimited(--p, new, 1); - if (p == NULL) { - err(COMPILE, "unterminated transform target string"); - goto bad; - } - EATSPACE(); - if (strlen(new) != strlen(old)) { - err(COMPILE, "transform strings are not the same length"); - goto bad; - } - /* We assume characters are 8 bits */ - lt = xmalloc(UCHAR_MAX + 1); - for (i = 0; i <= UCHAR_MAX; i++) - lt[i] = (char)i; - for (op = old, np = new; *op; op++, np++) - lt[(u_char)*op] = *np; - *transtab = lt; - free(old); - free(new); - return (p); -bad: - free(old); - free(new); - return (NULL); -} - -/* - * Compile the text following an a, c, or i command. - */ -static char * -compile_text(void) -{ - int asize, esc_nl, size; - char *lbuf, *text, *p, *op, *s; - size_t bufsize; - - lbuf = text = NULL; - asize = size = 0; - while ((p = cu_fgets(&lbuf, &bufsize))) { - size_t len = ROUNDLEN(strlen(p) + 1); - if (asize - size < len) { - do { - asize += len; - } while (asize - size < len); - text = xrealloc(text, asize); - } - op = s = text + size; - for (esc_nl = 0; *p != '\0'; p++) { - if (*p == '\\' && p[1] != '\0' && *++p == '\n') - esc_nl = 1; - *s++ = *p; - } - size += s - op; - if (!esc_nl) { - *s = '\0'; - break; - } - } - free(lbuf); - text = xrealloc(text, size + 1); - text[size] = '\0'; - return (text); -} - -/* - * Get an address and return a pointer to the first character after - * it. Fill the structure pointed to according to the address. - */ -static char * -compile_addr(char *p, struct s_addr *a) -{ - char *end; - - switch (*p) { - case '\\': /* Context address */ - ++p; - /* FALLTHROUGH */ - case '/': /* Context address */ - p = compile_re(p, &a->u.r); - if (p == NULL) - err(COMPILE, "unterminated regular expression"); - a->type = AT_RE; - return (p); - - case '$': /* Last line */ - a->type = AT_LAST; - return (p + 1); - /* Line number */ - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - a->type = AT_LINE; - a->u.l = strtoul(p, &end, 10); - return (end); - default: - err(COMPILE, "expected context address"); - return (NULL); - } -} - -/* - * duptoeol -- - * Return a copy of all the characters up to \n or \0. - */ -static char * -duptoeol(char *s, char *ctype, char **semi) -{ - size_t len; - int ws; - char *start; - - ws = 0; - if (semi) { - for (start = s; *s != '\0' && *s != '\n' && *s != ';'; ++s) - ws = isspace(*s); - } else { - for (start = s; *s != '\0' && *s != '\n'; ++s) - ws = isspace(*s); - *s = '\0'; - } - if (ws) - err(WARNING, "whitespace after %s", ctype); - len = s - start + 1; - if (semi) - *semi = s; - s = xmalloc(len); - strlcpy(s, start, len); - return (s); -} - -/* - * Convert goto label names to addresses, and count a and r commands, in - * the given subset of the script. Free the memory used by labels in b - * and t commands (but not by :). - * - * TODO: Remove } nodes - */ -static void -fixuplabel(struct s_command *cp, struct s_command *end) -{ - - for (; cp != end; cp = cp->next) - switch (cp->code) { - case 'a': - case 'r': - appendnum++; - break; - case 'b': - case 't': - /* Resolve branch target. */ - if (cp->t == NULL) { - cp->u.c = NULL; - break; - } - if ((cp->u.c = findlabel(cp->t)) == NULL) - err(COMPILE2, "undefined label '%s'", cp->t); - free(cp->t); - break; - case '{': - /* Do interior commands. */ - fixuplabel(cp->u.c, cp->next); - break; - } -} - -/* - * Associate the given command label for later lookup. - */ -static void -enterlabel(struct s_command *cp) -{ - struct labhash **lhp, *lh; - u_char *p; - u_int h, c; - - for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) - h = (h << 5) + h + c; - lhp = &labels[h & LHMASK]; - for (lh = *lhp; lh != NULL; lh = lh->lh_next) - if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) - err(COMPILE2, "duplicate label '%s'", cp->t); - lh = xmalloc(sizeof *lh); - lh->lh_next = *lhp; - lh->lh_hash = h; - lh->lh_cmd = cp; - lh->lh_ref = 0; - *lhp = lh; -} - -/* - * Find the label contained in the command l in the command linked - * list cp. L is excluded from the search. Return NULL if not found. - */ -static struct s_command * -findlabel(char *name) -{ - struct labhash *lh; - u_char *p; - u_int h, c; - - for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) - h = (h << 5) + h + c; - for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { - if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { - lh->lh_ref = 1; - return (lh->lh_cmd); - } - } - return (NULL); -} - -/* - * Warn about any unused labels. As a side effect, release the label hash - * table space. - */ -static void -uselabel(void) -{ - struct labhash *lh, *next; - int i; - - for (i = 0; i < LHSZ; i++) { - for (lh = labels[i]; lh != NULL; lh = next) { - next = lh->lh_next; - if (!lh->lh_ref) - err(WARNING, "unused label '%s'", - lh->lh_cmd->t); - free(lh); - } - } -} diff --git a/sed/defs.h b/sed/defs.h @@ -1,148 +0,0 @@ -/* * $OpenBSD: defs.h,v 1.4 2008/10/16 16:34:32 millert Exp $*/ -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)defs.h 8.1 (Berkeley) 6/6/93 - */ - -/* - * Types of address specifications - */ -enum e_atype { - AT_RE, /* Line that match RE */ - AT_LINE, /* Specific line */ - AT_LAST, /* Last line */ -}; - -/* - * Format of an address - */ -struct s_addr { - enum e_atype type; /* Address type */ - union { - u_long l; /* Line number */ - regex_t *r; /* Regular expression */ - } u; -}; - -/* - * Substitution command - */ -struct s_subst { - int n; /* Occurrence to subst. */ - int p; /* True if p flag */ - char *wfile; /* NULL if no wfile */ - int wfd; /* Cached file descriptor */ - regex_t *re; /* Regular expression */ - int maxbref; /* Largest backreference. */ - u_long linenum; /* Line number. */ - char *new; /* Replacement text */ -}; - - -/* - * An internally compiled command. - * Initialy, label references are stored in t, on a second pass they - * are updated to pointers. - */ -struct s_command { - struct s_command *next; /* Pointer to next command */ - struct s_addr *a1, *a2; /* Start and end address */ - char *t; /* Text for : a c i r w */ - union { - struct s_command *c; /* Command(s) for b t { */ - struct s_subst *s; /* Substitute command */ - u_char *y; /* Replace command array */ - int fd; /* File descriptor for w */ - } u; - char code; /* Command code */ - u_int nonsel:1; /* True if ! */ - u_int inrange:1; /* True if in range */ -}; - -/* - * Types of command arguments recognised by the parser - */ -enum e_args { - EMPTY, /* d D g G h H l n N p P q x = \0 */ - TEXT, /* a c i */ - NONSEL, /* ! */ - GROUP, /* { */ - ENDGROUP, /* } */ - COMMENT, /* # */ - BRANCH, /* b t */ - LABEL, /* : */ - RFILE, /* r */ - WFILE, /* w */ - SUBST, /* s */ - TR /* y */ -}; - -/* - * Structure containing things to append before a line is read - */ -struct s_appends { - enum {AP_STRING, AP_FILE} type; - char *s; - size_t len; -}; - -enum e_spflag { - APPEND, /* Append to the contents. */ - REPLACE, /* Replace the contents. */ -}; - -/* - * Structure for a space (process, hold, otherwise). - */ -typedef struct { - char *space; /* Current space pointer. */ - size_t len; /* Current length. */ - int deleted; /* If deleted. */ - char *back; /* Backing memory. */ - size_t blen; /* Backing memory length. */ -} SPACE; - -/* - * Error severity codes: - */ -#define FATAL 0 /* Exit immediately with 1 */ -#define ERROR 1 /* Continue, but change exit value */ -#define WARNING 2 /* Just print the warning */ -#define COMPILE 3 /* Print error, count and finish script */ -#define COMPILE2 3 /* Print error, count and finish script */ - -/* - * Round up to the nearest multiple of _POSIX2_LINE_MAX - */ -#define ROUNDLEN(x) \ - (((x) + _POSIX2_LINE_MAX - 1) & ~(_POSIX2_LINE_MAX - 1)) diff --git a/sed/extern.h b/sed/extern.h @@ -1,56 +0,0 @@ -/* * $OpenBSD: extern.h,v 1.6 2009/08/07 03:30:56 djm Exp $*/ -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)extern.h 8.1 (Berkeley) 6/6/93 - */ - -extern struct s_command *prog; -extern struct s_appends *appends; -extern regmatch_t *match; -extern size_t maxnsub; -extern u_long linenum; -extern int appendnum; -extern int lastline; -extern int Eflag, aflag, eflag, nflag; -extern char *fname; - -void cfclose(struct s_command *, struct s_command *); -void compile(void); -void cspace(SPACE *, char *, size_t, enum e_spflag); -char *cu_fgets(char **, size_t *); -void err(int, const char *, ...); -int mf_fgets(SPACE *, enum e_spflag); -void process(void); -char *strregerror(int, regex_t *); -void *xmalloc(size_t); -void *xrealloc(void *, size_t); diff --git a/sed/main.c b/sed/main.c @@ -1,356 +0,0 @@ -/* $OpenBSD: main.c,v 1.17 2009/10/27 23:59:43 deraadt Exp $ */ - -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/types.h> - -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <regex.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "defs.h" -#include "extern.h" - -/* - * Linked list of units (strings and files) to be compiled - */ -struct s_compunit { - struct s_compunit *next; - enum e_cut {CU_FILE, CU_STRING} type; - char *s; /* Pointer to string or fname */ -}; - -/* - * Linked list pointer to compilation units and pointer to current - * next pointer. - */ -static struct s_compunit *script, **cu_nextp = &script; - -/* - * Linked list of files to be processed - */ -struct s_flist { - char *fname; - struct s_flist *next; -}; - -/* - * Linked list pointer to files and pointer to current - * next pointer. - */ -static struct s_flist *files, **fl_nextp = &files; - -int Eflag, aflag, eflag, nflag; - -/* - * Current file and line number; line numbers restart across compilation - * units, but span across input files. - */ -char *fname; /* File name. */ -u_long linenum; -int lastline; /* TRUE on the last line of the last file */ - -static void add_compunit(enum e_cut, char *); -static void add_file(char *); - -int -main(int argc, char *argv[]) -{ - int c, fflag; - - fflag = 0; - while ((c = getopt(argc, argv, "Eae:f:nru")) != -1) - switch (c) { - case 'E': - case 'r': - Eflag = 1; - break; - case 'a': - aflag = 1; - break; - case 'e': - eflag = 1; - add_compunit(CU_STRING, optarg); - break; - case 'f': - fflag = 1; - add_compunit(CU_FILE, optarg); - break; - case 'n': - nflag = 1; - break; - case 'u': - setlinebuf(stdout); - break; - default: - case '?': - (void)fprintf(stderr, - "usage: sed [-aEnru] command [file ...]\n" - " sed [-aEnru] [-e command] [-f command_file] [file ...]\n"); - exit(1); - } - argc -= optind; - argv += optind; - - /* First usage case; script is the first arg */ - if (!eflag && !fflag && *argv) { - add_compunit(CU_STRING, *argv); - argv++; - } - - compile(); - - /* Continue with first and start second usage */ - if (*argv) - for (; *argv; argv++) - add_file(*argv); - else - add_file(NULL); - process(); - cfclose(prog, NULL); - if (fclose(stdout)) - err(FATAL, "stdout: %s", strerror(errno)); - exit (0); -} - -/* - * Like fgets, but go through the chain of compilation units chaining them - * together. Empty strings and files are ignored. - */ -char * -cu_fgets(char **outbuf, size_t *outsize) -{ - static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF; - static FILE *f; /* Current open file */ - static char *s; /* Current pointer inside string */ - static char string_ident[30]; - size_t len; - char *p; - - if (*outbuf == NULL) - *outsize = 0; - -again: - switch (state) { - case ST_EOF: - if (script == NULL) - return (NULL); - linenum = 0; - switch (script->type) { - case CU_FILE: - if ((f = fopen(script->s, "r")) == NULL) - err(FATAL, - "%s: %s", script->s, strerror(errno)); - fname = script->s; - state = ST_FILE; - goto again; - case CU_STRING: - if ((snprintf(string_ident, - sizeof(string_ident), "\"%s\"", script->s)) >= - sizeof(string_ident)) - strlcpy(string_ident + - sizeof(string_ident) - 6, " ...\"", 5); - fname = string_ident; - s = script->s; - state = ST_STRING; - goto again; - } - case ST_FILE: - if ((p = fgetln(f, &len)) != NULL) { - linenum++; - if (len >= *outsize) { - free(*outbuf); - *outsize = ROUNDLEN(len + 1); - *outbuf = xmalloc(*outsize); - } - memcpy(*outbuf, p, len); - (*outbuf)[len] = '\0'; - if (linenum == 1 && p[0] == '#' && p[1] == 'n') - nflag = 1; - return (*outbuf); - } - script = script->next; - (void)fclose(f); - state = ST_EOF; - goto again; - case ST_STRING: - if (linenum == 0 && s[0] == '#' && s[1] == 'n') - nflag = 1; - p = *outbuf; - len = *outsize; - for (;;) { - if (len <= 1) { - *outbuf = xrealloc(*outbuf, - *outsize + _POSIX2_LINE_MAX); - p = *outbuf + *outsize - len; - len += _POSIX2_LINE_MAX; - *outsize += _POSIX2_LINE_MAX; - } - switch (*s) { - case '\0': - state = ST_EOF; - if (s == script->s) { - script = script->next; - goto again; - } else { - script = script->next; - *p = '\0'; - linenum++; - return (*outbuf); - } - case '\n': - *p++ = '\n'; - *p = '\0'; - s++; - linenum++; - return (*outbuf); - default: - *p++ = *s++; - len--; - } - } - } - /* NOTREACHED */ -} - -/* - * Like fgets, but go through the list of files chaining them together. - * Set len to the length of the line. - */ -int -mf_fgets(SPACE *sp, enum e_spflag spflag) -{ - static FILE *f; /* Current open file */ - size_t len; - char *p; - int c; - - if (f == NULL) - /* Advance to first non-empty file */ - for (;;) { - if (files == NULL) { - lastline = 1; - return (0); - } - if (files->fname == NULL) { - f = stdin; - fname = "stdin"; - } else { - fname = files->fname; - if ((f = fopen(fname, "r")) == NULL) - err(FATAL, "%s: %s", - fname, strerror(errno)); - } - if ((c = getc(f)) != EOF) { - (void)ungetc(c, f); - break; - } - (void)fclose(f); - files = files->next; - } - - if (lastline) { - sp->len = 0; - return (0); - } - - /* - * Use fgetln so that we can handle essentially infinite input data. - * Can't use the pointer into the stdio buffer as the process space - * because the ungetc() can cause it to move. - */ - p = fgetln(f, &len); - if (ferror(f)) - err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO)); - cspace(sp, p, len, spflag); - - linenum++; - /* Advance to next non-empty file */ - while ((c = getc(f)) == EOF) { - (void)fclose(f); - files = files->next; - if (files == NULL) { - lastline = 1; - return (1); - } - if (files->fname == NULL) { - f = stdin; - fname = "stdin"; - } else { - fname = files->fname; - if ((f = fopen(fname, "r")) == NULL) - err(FATAL, "%s: %s", fname, strerror(errno)); - } - } - (void)ungetc(c, f); - return (1); -} - -/* - * Add a compilation unit to the linked list - */ -static void -add_compunit(enum e_cut type, char *s) -{ - struct s_compunit *cu; - - cu = xmalloc(sizeof(struct s_compunit)); - cu->type = type; - cu->s = s; - cu->next = NULL; - *cu_nextp = cu; - cu_nextp = &cu->next; -} - -/* - * Add a file to the linked list - */ -static void -add_file(char *s) -{ - struct s_flist *fp; - - fp = xmalloc(sizeof(struct s_flist)); - fp->next = NULL; - *fl_nextp = fp; - fp->fname = s; - fl_nextp = &fp->next; -} diff --git a/sed/misc.c b/sed/misc.c @@ -1,113 +0,0 @@ -/* $OpenBSD: misc.c,v 1.9 2009/10/27 23:59:43 deraadt Exp $ */ - -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/types.h> - -#include <errno.h> -#include <regex.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> - -#include "defs.h" -#include "extern.h" - -/* - * malloc with result test - */ -void * -xmalloc(size_t size) -{ - void *p; - - if ((p = malloc(size)) == NULL) - err(FATAL, "%s", strerror(errno)); - return (p); -} - -/* - * realloc with result test - */ -void * -xrealloc(void *p, size_t size) -{ - - if ((p = realloc(p, size)) == NULL) - err(FATAL, "%s", strerror(errno)); - return (p); -} - -/* - * Return a string for a regular expression error passed. This is a overkill, - * because of the silly semantics of regerror (we can never know the size of - * the buffer). - */ -char * -strregerror(int errcode, regex_t *preg) -{ - static char *oe; - size_t s; - - free(oe); - s = regerror(errcode, preg, "", 0); - oe = xmalloc(s); - (void)regerror(errcode, preg, oe, s); - return (oe); -} - -/* - * Error reporting function - */ -void -err(int severity, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - (void)fprintf(stderr, "sed: "); - switch (severity) { - case WARNING: - case COMPILE: - (void)fprintf(stderr, "%lu: %s: ", linenum, fname); - } - (void)vfprintf(stderr, fmt, ap); - va_end(ap); - (void)fprintf(stderr, "\n"); - if (severity == WARNING) - return; - exit(1); - /* NOTREACHED */ -} diff --git a/sed/mkfile b/sed/mkfile @@ -1,7 +1,7 @@ BIN = sed -OBJ = compile.o main.o misc.o process.o +OBJ = sed0.o sed1.o version.o +LOCAL_CFLAGS = -DSU3 INSTALL_BIN = sed INSTALL_MAN1 = sed.1 -LOCAL_CFLAGS = -DDEFFILEMODE=0666 -DREG_STARTEND=REG_NEWLINE -<$mkbuild/mk.default -\ No newline at end of file +<$mkbuild/mk.default diff --git a/sed/process.c b/sed/process.c @@ -1,615 +0,0 @@ -/* $OpenBSD: process.c,v 1.18 2011/09/17 15:29:19 schwarze Exp $ */ - -/*- - * Copyright (c) 1992 Diomidis Spinellis. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis of Imperial College, University of London. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/ioctl.h> -#include <sys/uio.h> - -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <regex.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "defs.h" -#include "extern.h" - -static SPACE HS, PS, SS; -#define pd PS.deleted -#define ps PS.space -#define psl PS.len -#define hs HS.space -#define hsl HS.len - -static inline int applies(struct s_command *); -static void flush_appends(void); -static void lputs(char *); -static inline int regexec_e(regex_t *, const char *, int, int, size_t); -static void regsub(SPACE *, char *, char *); -static int substitute(struct s_command *); - -struct s_appends *appends; /* Array of pointers to strings to append. */ -static int appendx; /* Index into appends array. */ -int appendnum; /* Size of appends array. */ - -static int lastaddr; /* Set by applies if last address of a range. */ -static int sdone; /* If any substitutes since last line input. */ - /* Iov structure for 'w' commands. */ -static regex_t *defpreg; -size_t maxnsub; -regmatch_t *match; - -#define OUT(s) do { fwrite(s, sizeof(u_char), psl, stdout); } while (0) - -void -process(void) -{ - struct s_command *cp; - SPACE tspace; - size_t len, oldpsl; - char *p; - - for (linenum = 0; mf_fgets(&PS, REPLACE);) { - pd = 0; -top: - cp = prog; -redirect: - while (cp != NULL) { - if (!applies(cp)) { - cp = cp->next; - continue; - } - switch (cp->code) { - case '{': - cp = cp->u.c; - goto redirect; - case 'a': - if (appendx >= appendnum) { - appends = xrealloc(appends, - sizeof(struct s_appends) * - (appendnum * 2)); - appendnum *= 2; - } - appends[appendx].type = AP_STRING; - appends[appendx].s = cp->t; - appends[appendx].len = strlen(cp->t); - appendx++; - break; - case 'b': - cp = cp->u.c; - goto redirect; - case 'c': - pd = 1; - psl = 0; - if (cp->a2 == NULL || lastaddr) - (void)printf("%s", cp->t); - break; - case 'd': - pd = 1; - goto new; - case 'D': - if (pd) - goto new; - if (psl == 0 || - (p = memchr(ps, '\n', psl - 1)) == NULL) { - pd = 1; - goto new; - } else { - psl -= (p + 1) - ps; - memmove(ps, p + 1, psl); - goto top; - } - case 'g': - cspace(&PS, hs, hsl, REPLACE); - break; - case 'G': - if (hs == NULL) - cspace(&HS, "\n", 1, REPLACE); - cspace(&PS, hs, hsl, 0); - break; - case 'h': - cspace(&HS, ps, psl, REPLACE); - break; - case 'H': - cspace(&HS, ps, psl, 0); - break; - case 'i': - (void)printf("%s", cp->t); - break; - case 'l': - lputs(ps); - break; - case 'n': - if (!nflag && !pd) - OUT(ps); - flush_appends(); - if (!mf_fgets(&PS, REPLACE)) - exit(0); - pd = 0; - break; - case 'N': - flush_appends(); - if (!mf_fgets(&PS, 0)) { - if (!nflag && !pd) - OUT(ps); - exit(0); - } - break; - case 'p': - if (pd) - break; - OUT(ps); - break; - case 'P': - if (pd) - break; - if (psl != 0 && - (p = memchr(ps, '\n', psl - 1)) != NULL) { - oldpsl = psl; - psl = (p + 1) - ps; - } - OUT(ps); - if (p != NULL) - psl = oldpsl; - break; - case 'q': - if (!nflag && !pd) - OUT(ps); - flush_appends(); - exit(0); - case 'r': - if (appendx >= appendnum) - appends = xrealloc(appends, - sizeof(struct s_appends) * - (appendnum *= 2)); - appends[appendx].type = AP_FILE; - appends[appendx].s = cp->t; - appends[appendx].len = strlen(cp->t); - appendx++; - break; - case 's': - sdone |= substitute(cp); - break; - case 't': - if (sdone) { - sdone = 0; - cp = cp->u.c; - goto redirect; - } - break; - case 'w': - if (pd) - break; - if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, - O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, - DEFFILEMODE)) == -1) - err(FATAL, "%s: %s", - cp->t, strerror(errno)); - if (write(cp->u.fd, ps, psl) != psl) - err(FATAL, "%s: %s", - cp->t, strerror(errno)); - break; - case 'x': - if (hs == NULL) - cspace(&HS, "\n", 1, REPLACE); - tspace = PS; - PS = HS; - HS = tspace; - break; - case 'y': - if (pd || psl == 0) - break; - for (p = ps, len = psl; --len; ++p) - *p = cp->u.y[(unsigned char)*p]; - break; - case ':': - case '}': - break; - case '=': - (void)printf("%lu\n", linenum); - } - cp = cp->next; - } /* for all cp */ - -new: if (!nflag && !pd) - OUT(ps); - flush_appends(); - } /* for all lines */ -} - -/* - * TRUE if the address passed matches the current program state - * (lastline, linenumber, ps). - */ -#define MATCH(a) \ - (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ - (a)->type == AT_LINE ? linenum == (a)->u.l : lastline - -/* - * Return TRUE if the command applies to the current line. Sets the inrange - * flag to process ranges. Interprets the non-select (``!'') flag. - */ -static inline int -applies(struct s_command *cp) -{ - int r; - - lastaddr = 0; - if (cp->a1 == NULL && cp->a2 == NULL) - r = 1; - else if (cp->a2) - if (cp->inrange) { - if (MATCH(cp->a2)) { - cp->inrange = 0; - lastaddr = 1; - } - r = 1; - } else if (MATCH(cp->a1)) { - /* - * If the second address is a number less than or - * equal to the line number first selected, only - * one line shall be selected. - * -- POSIX 1003.2 - */ - if (cp->a2->type == AT_LINE && - linenum >= cp->a2->u.l) - lastaddr = 1; - else - cp->inrange = 1; - r = 1; - } else - r = 0; - else - r = MATCH(cp->a1); - return (cp->nonsel ? !r : r); -} - -/* - * substitute -- - * Do substitutions in the pattern space. Currently, we build a - * copy of the new pattern space in the substitute space structure - * and then swap them. - */ -static int -substitute(struct s_command *cp) -{ - SPACE tspace; - regex_t *re; - regoff_t slen; - int n, lastempty; - char *s; - - s = ps; - re = cp->u.s->re; - if (re == NULL) { - if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { - linenum = cp->u.s->linenum; - err(COMPILE, "\\%d not defined in the RE", - cp->u.s->maxbref); - } - } - if (!regexec_e(re, s, 0, 0, psl)) - return (0); - - SS.len = 0; /* Clean substitute space. */ - slen = psl; - n = cp->u.s->n; - lastempty = 1; - - do { - /* Copy the leading retained string. */ - if (n <= 1 && match[0].rm_so) - cspace(&SS, s, match[0].rm_so, APPEND); - - /* Skip zero-length matches right after other matches. */ - if (lastempty || match[0].rm_so || - match[0].rm_so != match[0].rm_eo) { - if (n <= 1) { - /* Want this match: append replacement. */ - regsub(&SS, s, cp->u.s->new); - if (n == 1) - n = -1; - } else { - /* Want a later match: append original. */ - if (match[0].rm_eo) - cspace(&SS, s, match[0].rm_eo, APPEND); - n--; - } - } - - /* Move past this match. */ - s += match[0].rm_eo; - slen -= match[0].rm_eo; - - /* - * After a zero-length match, advance one byte, - * and at the end of the line, terminate. - */ - if (match[0].rm_so == match[0].rm_eo) { - if (*s == '\0' || *s == '\n') - slen = -1; - else - slen--; - if (*s != '\0') - cspace(&SS, s++, 1, APPEND); - lastempty = 1; - } else - lastempty = 0; - - } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); - - /* Did not find the requested number of matches. */ - if (n > 1) - return (0); - - /* Copy the trailing retained string. */ - if (slen > 0) - cspace(&SS, s, slen, APPEND); - - /* - * Swap the substitute space and the pattern space, and make sure - * that any leftover pointers into stdio memory get lost. - */ - tspace = PS; - PS = SS; - SS = tspace; - SS.space = SS.back; - - /* Handle the 'p' flag. */ - if (cp->u.s->p) - OUT(ps); - - /* Handle the 'w' flag. */ - if (cp->u.s->wfile && !pd) { - if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, - O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) - err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); - if (write(cp->u.s->wfd, ps, psl) != psl) - err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); - } - return (1); -} - -/* - * Flush append requests. Always called before reading a line, - * therefore it also resets the substitution done (sdone) flag. - */ -static void -flush_appends(void) -{ - FILE *f; - int count, i; - char buf[8 * 1024]; - - for (i = 0; i < appendx; i++) - switch (appends[i].type) { - case AP_STRING: - fwrite(appends[i].s, sizeof(char), appends[i].len, - stdout); - break; - case AP_FILE: - /* - * Read files probably shouldn't be cached. Since - * it's not an error to read a non-existent file, - * it's possible that another program is interacting - * with the sed script through the file system. It - * would be truly bizarre, but possible. It's probably - * not that big a performance win, anyhow. - */ - if ((f = fopen(appends[i].s, "r")) == NULL) - break; - while ((count = fread(buf, sizeof(char), sizeof(buf), f))) - (void)fwrite(buf, sizeof(char), count, stdout); - (void)fclose(f); - break; - } - if (ferror(stdout)) - err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); - appendx = sdone = 0; -} - -static void -lputs(char *s) -{ - int count; - char *escapes, *p; - struct winsize win; - static int termwidth = -1; - - if (termwidth == -1) { - if ((p = getenv("COLUMNS"))) - termwidth = atoi(p); - else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && - win.ws_col > 0) - termwidth = win.ws_col; - else - termwidth = 60; - } - - for (count = 0; *s; ++s) { - if (count >= termwidth) { - (void)printf("\\\n"); - count = 0; - } - if (isascii(*s) && isprint(*s) && *s != '\\') { - (void)putchar(*s); - count++; - } else if (*s != '\n') { - escapes = "\\\a\b\f\r\t\v"; - (void)putchar('\\'); - if ((p = strchr(escapes, *s))) { - (void)putchar("\\abfrtv"[p - escapes]); - count += 2; - } else { - (void)printf("%03o", *(u_char *)s); - count += 4; - } - } - } - (void)putchar('$'); - (void)putchar('\n'); - if (ferror(stdout)) - err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); -} - -static inline int -regexec_e(regex_t *preg, const char *string, int eflags, - int nomatch, size_t slen) -{ - int eval; - - if (preg == NULL) { - if (defpreg == NULL) - err(FATAL, "first RE may not be empty"); - } else - defpreg = preg; - - /* Set anchors, discounting trailing newline (if any). */ - if (slen > 0 && string[slen - 1] == '\n') - slen--; - match[0].rm_so = 0; - match[0].rm_eo = slen; - eval = regexec(defpreg, string, - nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); - switch (eval) { - case 0: - return (1); - case REG_NOMATCH: - return (0); - } - err(FATAL, "RE error: %s", strregerror(eval, defpreg)); - /* NOTREACHED */ -} - -/* - * regsub - perform substitutions after a regexp match - * Based on a routine by Henry Spencer - */ -static void -regsub(SPACE *sp, char *string, char *src) -{ - int len, no; - char c, *dst; - -#define NEEDSP(reqlen) \ - if (sp->len + (reqlen) + 1 >= sp->blen) { \ - size_t newlen = sp->blen + (reqlen) + 1024; \ - sp->space = sp->back = xrealloc(sp->back, newlen); \ - sp->blen = newlen; \ - dst = sp->space + sp->len; \ - } - - dst = sp->space + sp->len; - while ((c = *src++) != '\0') { - if (c == '&') - no = 0; - else if (c == '\\' && isdigit(*src)) - no = *src++ - '0'; - else - no = -1; - if (no < 0) { /* Ordinary character. */ - if (c == '\\' && (*src == '\\' || *src == '&')) - c = *src++; - NEEDSP(1); - *dst++ = c; - ++sp->len; - } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { - len = match[no].rm_eo - match[no].rm_so; - NEEDSP(len); - memmove(dst, string + match[no].rm_so, len); - dst += len; - sp->len += len; - } - } - NEEDSP(1); - *dst = '\0'; -} - -/* - * aspace -- - * Append the source space to the destination space, allocating new - * space as necessary. - */ -void -cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag) -{ - size_t tlen; - - /* Make sure SPACE has enough memory and ramp up quickly. */ - tlen = sp->len + len + 1; - if (tlen > sp->blen) { - size_t newlen = tlen + 1024; - sp->space = sp->back = xrealloc(sp->back, newlen); - sp->blen = newlen; - } - - if (spflag == REPLACE) - sp->len = 0; - - memmove(sp->space + sp->len, p, len); - - sp->space[sp->len += len] = '\0'; -} - -/* - * Close all cached opened files and report any errors - */ -void -cfclose(struct s_command *cp, struct s_command *end) -{ - - for (; cp != end; cp = cp->next) - switch (cp->code) { - case 's': - if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) - err(FATAL, - "%s: %s", cp->u.s->wfile, strerror(errno)); - cp->u.s->wfd = -1; - break; - case 'w': - if (cp->u.fd != -1 && close(cp->u.fd)) - err(FATAL, "%s: %s", cp->t, strerror(errno)); - cp->u.fd = -1; - break; - case '{': - cfclose(cp->u.c, cp->next); - break; - } -} diff --git a/sed/sed.1 b/sed/sed.1 @@ -1,552 +1,369 @@ -.\" $OpenBSD: sed.1,v 1.36 2010/09/03 11:09:29 jmc Exp $ .\" -.\" Copyright (c) 1992, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" This code is derived from software contributed to Berkeley by -.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" Sccsid @(#)sed.1 1.19 (gritter) 1/24/05 +.\" Derived from sed(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" from: @(#)sed.1 8.2 (Berkeley) 12/30/93 +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. .\" -.Dd $Mdocdate: September 3 2010 $ -.Dt SED 1 -.Os -.Sh NAME -.Nm sed -.Nd stream editor -.Sh SYNOPSIS -.Nm sed -.Op Fl aEnru -.Ar command -.Op Ar -.Nm sed -.Op Fl aEnru -.Op Fl e Ar command -.Op Fl f Ar command_file -.Op Ar -.Sh DESCRIPTION +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH SED 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +sed \- stream editor +.SH SYNOPSIS +\fBsed\fR [\fB\-n\fR] [\fB\-e\fI\ script\fR] [\fB\-f\fI\ sfile\fR] +[\fIfile\fR\ .\ .\ .] +.SH DESCRIPTION +.I Sed +copies the named +.I files +(standard input default) to the standard output, +edited according to a script of commands. The -.Nm -utility reads the specified files, or the standard input if no files -are specified, modifying the input as specified by a list of commands. -The input is then written to the standard output. -.Pp -A single command may be specified as the first argument to -.Nm sed . -Multiple commands may be specified -separated by newlines or semicolons, -or by using the -.Fl e -or -.Fl f -options. -All commands are applied to the input in the order they are specified -regardless of their origin. -.Pp -The options are as follows: -.Bl -tag -width Ds -.It Fl a -The files listed as parameters for the -.Ql w -functions are created (or truncated) before any processing begins, -by default. -The -.Fl a -option causes -.Nm -to delay opening each file until a command containing the related -.Ql w -function is applied to a line of input. -.It Fl E -Interpret regular expressions using POSIX extended regular expression syntax. -The default behaviour is to use POSIX basic regular expression syntax. -.It Fl e Ar command -Append the editing commands specified by the -.Ar command -argument -to the list of commands. -.It Fl f Ar command_file -Append the editing commands found in the file -.Ar command_file -to the list of commands. -The editing commands should each be listed on a separate line. -.It Fl r -An alias for -.Fl E , -for compatibility with GNU sed. -.It Fl n -By default, each line of input is echoed to the standard output after -all of the commands have been applied to it. +.B \-f +option causes the script to be taken from file +.IR sfile ; +these options accumulate. +If there is just one +.B \-e +option and no +.BR \-f 's, +the flag +.B \-e +may be omitted. The -.Fl n -option suppresses this behavior. -.It Fl u -Force output to be line buffered, -printing each line as it becomes available. -By default, output is line buffered when standard output is a terminal -and block buffered otherwise. -See -.Xr setbuf 3 -for a more detailed explanation. -.El -.Pp -The form of a -.Nm -command is as follows: -.Pp -.Dl [address[,address]]function[arguments] -.Pp -Whitespace may be inserted before the first address and the function -portions of the command. -.Pp -Normally, -.Nm -cyclically copies a line of input, not including its terminating newline -character, into a -.Em pattern space , -(unless there is something left after a -.Sq D -function), -applies all of the commands with addresses that select that pattern space, -copies the pattern space to the standard output, appending a newline, and -deletes the pattern space. -.Pp -Some of the functions use a -.Em hold space -to save all or part of the pattern space for subsequent retrieval. -.Sh SED ADDRESSES -An address is not required, but if specified must be a number (that counts -input lines -cumulatively across input files), a dollar character -.Pq Ql $ -that addresses the last line of input, or a context address -(which consists of a regular expression preceded and followed by a -delimiter). -.Pp +.B \-n +option suppresses the default output. +.PP +A script consists of editing commands, one per line, +of the following form: +.IP +[address [, address] ] function [arguments] +.PP +In normal operation +.I sed +cyclically copies a line of input into a +.I pattern space +(unless there is something left after +a `D' command), +applies in sequence +all commands whose +.I addresses +select that pattern space, +and at the end of the script copies the pattern space +to the standard output (except under +.BR \-n ) +and deletes the pattern space. +.PP +An +.I address +is either a decimal number that counts +input lines cumulatively across files, a `$' that +addresses the last line of input, or a context address, +`/regular expression/', in the style of +.IR ed (1) +modified thus: +.IP +The escape sequence `\en' matches a +newline embedded in the pattern space. +.IP A command line with no addresses selects every pattern space. -.Pp -A command line with one address selects all of the pattern spaces -that match the address. -.Pp -A command line with two addresses selects the inclusive range from -the first pattern space that matches the first address through the next -pattern space that matches the second. -(If the second address is a number less than or equal to the line number -first selected, only that line is selected.) -Starting at the first line following the selected range, -.Nm -starts looking again for the first address. -.Pp -Editing commands can be applied to non-selected pattern spaces by use -of the exclamation character -.Pq Ql \&! -function. -.Sh SED REGULAR EXPRESSIONS -By default, -.Nm -regular expressions are basic regular expressions -.Pq BREs . -Extended regular expressions are supported using the -.Fl E +.IP +A command line with +one address selects each pattern space that matches the address. +.IP +A command line with +two addresses selects the inclusive range from the first +pattern space that matches the first address through +the next pattern space that matches +the second. +(If the second address is a number less than or equal +to the line number first selected, only one +line is selected.) +Thereafter the process is repeated, looking again for the +first address. +.PP +Editing commands can be applied only to non-selected pattern +spaces by use of the negation function `!' (below). +.PP +Regular expressions are simple regular expressions with +.BR /usr/5bin/sed , +and basic regular expressions with +.BR /usr/5bin/posix/sed , +.BR /usr/5bin/posix2001/sed , and -.Fl r -options. -See -.Xr re_format 7 -for more information on regular expressions. -In addition, -.Nm -has the following two additions to BREs: -.Pp -.Bl -enum -compact -.It -In a context address, any character other than a backslash -.Pq Ql \e -or newline character may be used to delimit the regular expression. -The opening delimiter should be preceded by a backslash -unless it is a slash. -Putting a backslash character before the delimiting character -causes the character to be treated literally. -For example, in the context address \exabc\exdefx, the RE delimiter -is an -.Sq x -and the second -.Sq x -stands for itself, so that the regular expression is -.Dq abcxdef . -.Pp -.It -The escape sequence \en matches a newline character embedded in the -pattern space. -You can't, however, use a literal newline character in an address or -in the substitute command. -.El -.Pp -One special feature of -.Nm -regular expressions is that they can default to the last regular -expression used. -If a regular expression is empty, i.e., just the delimiter characters -are specified, the last regular expression encountered is used instead. -The last regular expression is defined as the last regular expression -used as part of an address or substitute command, and at run-time, not -compile-time. -For example, the command -.Dq /abc/s//XXX/ -will substitute -.Dq XXX -for the pattern -.Dq abc . -.Sh SED FUNCTIONS -In the following list of commands, the maximum number of permissible -addresses for each command is indicated by [0addr], [1addr], or [2addr], -representing zero, one, or two addresses. -.Pp -The argument -.Em text -consists of one or more lines. -To embed a newline in the text, precede it with a backslash. -Other backslashes in text are deleted and the following character -taken literally. -.Pp -The -.Sq r -and -.Sq w -functions take an optional file parameter, which should be separated -from the function letter by whitespace. -Each file given as an argument to -.Nm -is created (or its contents truncated) before any input processing begins. -.Pp -The -.Sq b , -.Sq r , -.Sq s , -.Sq t , -.Sq w , -.Sq y , -.Ql \&! , +.BR /usr/5bin/s42/sed . +.PP +In the following list of functions the +maximum number of permissible addresses +for each function is indicated in parentheses. +.PP +An argument denoted +.I text +consists of one or more lines, +all but the last of which end with `\e' to hide the +newline. +Backslashes in text are treated like backslashes +in the replacement string of an `s' command. +.PP +An argument denoted +.I rfile +or +.I wfile +must terminate the command +line. +.B /usr/5bin/sed and -.Ql \&: -functions all accept additional arguments. -The following synopses indicate which arguments have to be separated from -the function letters by whitespace characters. -.Pp -Two of the functions take a function-list. -This is a list of -.Nm -functions separated by newlines, as follows: -.Bd -literal -offset indent -{ function - function - ... - function -} -.Ed -.Pp -The -.Ql { -can be preceded or followed by whitespace. -The function can be preceded by whitespace as well. -The terminating -.Ql } -must be preceded by a newline or optional whitespace. -.Pp -.Bl -tag -width "XXXXXXXX" -compact -.It [2addr] Em function-list -Execute -.Em function-list -only when the pattern space is selected. -.Pp -.It [1addr] Ns Em a Ns \e -.It Em text -.Pp -Write -.Em text -to standard output immediately before each attempt to read a line of input, -whether by executing the -.Sq N -function or by beginning a new cycle. -.Pp -.It [2addr] Ns Em b Ns [label] -Branch to the -.Sq \&: -function with the specified label. -If the label is not specified, branch to the end of the script. -.Pp -.It [2addr] Ns Em c Ns \e -.It Em text -.Pp -Delete the pattern space. -With 0 or 1 address or at the end of a 2-address range, -.Em text -is written to the standard output. -.Pp -.It [2addr] Ns Em d -Delete the pattern space and start the next cycle. -.Pp -.It [2addr] Ns Em D -Delete the initial segment of the pattern space through the first -newline character and start the next cycle. -.Pp -.It [2addr] Ns Em g -Replace the contents of the pattern space with the contents of the -hold space. -.Pp -.It [2addr] Ns Em G -Append a newline character followed by the contents of the hold space -to the pattern space. -.Pp -.It [2addr] Ns Em h -Replace the contents of the hold space with the contents of the -pattern space. -.Pp -.It [2addr] Ns Em H -Append a newline character followed by the contents of the pattern space -to the hold space. -.Pp -.It [1addr] Ns Em i Ns \e -.It Em text -.Pp -Write -.Em text -to the standard output. -.Pp -.It [2addr] Ns Em l -(The letter ell.) -Write the pattern space to the standard output in a visually unambiguous -form. -This form is as follows: -.Pp -.Bl -tag -width "carriage-returnXX" -offset indent -compact -.It backslash -\e\e -.It alert -\ea -.It backspace -\eb -.It form-feed -\ef -.It carriage-return -\er -.It tab -\et -.It vertical tab -\ev -.El -.Pp -Non-printable characters are written as three-digit octal numbers (with a -preceding backslash) for each byte in the character (most significant byte -first). -Long lines are folded, with the point of folding indicated by displaying -a backslash followed by a newline. -The end of each line is marked with a -.Ql $ . -.Pp -.It [2addr] Ns Em n -Write the pattern space to the standard output if the default output has -not been suppressed, and replace the pattern space with the next line of -input. -.Pp -.It [2addr] Ns Em N -Append the next line of input to the pattern space, using an embedded -newline character to separate the appended material from the original -contents. -Note that the current line number changes. -.Pp -.It [2addr] Ns Em p -Write the pattern space to standard output. -.Pp -.It [2addr] Ns Em P -Write the pattern space, up to the first newline character to the -standard output. -.Pp -.It [1addr] Ns Em q -Branch to the end of the script and quit without starting a new cycle. -.Pp -.It [1addr] Ns Em r file -Copy the contents of -.Em file -to the standard output immediately before the next attempt to read a -line of input. +.B /usr/5bin/s42/sed +require that it is +preceded by exactly one blank. +Each +.I wfile +is created before processing begins. +.TP 10 +(1)\|\fBa\e\fR +.br +.ns +.TP 10 +.I text +Append. +Place +.I text +on the output before +reading the next input line. +.TP 10 +(2)\|\fBb \fIlabel\fR +Branch to the `:' command bearing the +.IR label . If -.Em file -cannot be read for any reason, it is silently ignored and no error -condition is set. -.Pp -.It [2addr] Ns Em s Ns /re/replacement/flags -Substitute the replacement string for the first instance of the regular -expression in the pattern space. -Any character other than backslash or newline can be used instead of -a slash to delimit the RE and the replacement. -Within the RE and the replacement, the RE delimiter itself can be used as -a literal character if it is preceded by a backslash. -.Pp -An ampersand -.Pq Ql & -appearing in the replacement is replaced by the string matching the RE. -The special meaning of -.Ql & -in this context can be suppressed by preceding it by a backslash. -The string -.Ql \e# , -where -.Ql # -is a digit, is replaced by the text matched -by the corresponding backreference expression (see -.Xr re_format 7 ) . -.Pp -A line can be split by substituting a newline character into it. -To specify a newline character in the replacement string, precede it with -a backslash. -.Pp -The value of -.Em flags -in the substitute function is zero or more of the following: -.Bl -tag -width "XXXXXX" -offset indent -.It 0 ... 9 -Make the substitution only for the N'th occurrence of the regular -expression in the pattern space. -.It g -Make the substitution for all non-overlapping matches of the -regular expression, not just the first one. -.It p -Write the pattern space to standard output if a replacement was made. -If the replacement string is identical to that which it replaces, it -is still considered to have been a replacement. -.It w Em file +.I label +is empty, branch to the end of the script. +.TP 10 +(2)\|\fBc\e\fR +.br +.ns +.TP 10 +.I text +Change. +Delete the pattern space. +With 0 or 1 address or at the end of a 2-address range, place +.I text +on the output. +Start the next cycle. +.TP 10 +(2)\|\fBd\fR +Delete the pattern space. +Start the next cycle. +.TP 10 +(2)\|\fBD\fR +Delete the initial segment of the +pattern space through the first newline. +Start the next cycle. +.TP 10 +(2)\|\fBg\fR +Replace the contents of the pattern space +by the contents of the hold space. +.TP 10 +(2)\|\fBG\fR +Append the contents of the hold space to the pattern space. +.TP 10 +(2)\|\fBh\fR +Replace the contents of the hold space by the contents of the pattern space. +.TP 10 +(2)\|\fBH\fR +Append the contents of the pattern space to the hold space. +.TP 10 +(1)\|\fBi\e\fR +.br +.ns +.TP 10 +.I text +Insert. +Place +.I text +on the standard output. +.TP 10 +(2)\|\fBl\fR +List the pattern space on the standard output in an +unambiguous form. +Non-printing ASCII characters are spelled +in two- or three-digit ASCII, +and long lines are folded. +.TP 10 +(2)\|\fBn\fR +Copy the pattern space to the standard output. +Replace the pattern space with the next line of input. +.TP 10 +(2)\|\fBN\fR +Append the next line of input to the pattern space +with an embedded newline. +(The current line number changes.) +.TP 10 +(2)\|\fBp\fR +Print. +Copy the pattern space to the standard output. +.TP 10 +(2)\|\fBP\fR +Copy the initial segment of the pattern space through +the first newline to the standard output. +.TP 10 +(1)\|\fBq\fR +Quit. +Branch to the end of the script. +Do not start a new cycle. +.TP 10 +(2)\|\fBr \fIrfile\fR +Read the contents of +.IR rfile . +Place them on the output before reading +the next input line. +.TP 10 +(2)\|\fBs/\fIregular\ expression\fB/\fIreplacement\fB/\fIflags\fR +Substitute the +.I replacement +string for instances of the +.I regular expression +in the pattern space. +Any character may be used instead of `/'. +For a fuller description see +.IR ed (1). +.I Flags +is zero or more of +.RS +.TP +.B g +Global. +Substitute for all nonoverlapping instances of the +.I regular expression +rather than just the +first one. +.TP +.I n +\fIn\fR=1\^\(en\^512. +Substitute the \fIn\fRth occurrence of the +.I regular expression +only. +.TP +.B p +Print the pattern space if a replacement was made. +.TP +\fBw \fIwfile\fB +Write. +Append the pattern space to +.I wfile +if a replacement +was made. +.RE +.TP 10 +(2)\|\fBt \fIlabel\fR +Test. +Branch to the `:' command bearing the +.I label +if any +substitutions have been made since the most recent +reading of an input line or execution of a `t'. +If +.I label +is empty, branch to the end of the script. +.TP 10 +(2)\|\fBw \fIwfile\fR +Write. Append the pattern space to -.Em file -if a replacement was made. -If the replacement string is identical to that which it replaces, it -is still considered to have been a replacement. -.El -.Pp -.It [2addr] Ns Em t Ns [label] -Branch to the -.Ql \&: -function bearing the label if any substitutions have been made since the -most recent reading of an input line or execution of a -.Sq t -function. -If no label is specified, branch to the end of the script. -.Pp -.It [2addr] Ns Em w file -Append the pattern space to the -.Em file . -.Pp -.It [2addr] Ns Em x -Swap the contents of the pattern and hold spaces. -.Pp -.It [2addr] Ns Em y Ns /string1/string2/ +.IR wfile . +.TP 10 +(2)\|\fBx\fR +Exchange the contents of the pattern and hold spaces. +.TP 10 +(2)\|\fBy/\fIstring1\fB/\fIstring2\fB/\fR +Transform. Replace all occurrences of characters in -.Em string1 -in the pattern space with the corresponding characters from -.Em string2 . -Any character other than a backslash or newline can be used instead of -a slash to delimit the strings. -Within -.Em string1 +.I string1 +with the corresponding character in +.I string2. +The lengths of +.I +string1 and -.Em string2 , -a backslash followed by any character other than a newline is that literal -character, and a backslash followed by an -.Sq n -is replaced by a newline character. -.Pp -.Sm off -.It Xo [2addr] Em !function No ,\ \&[2addr] -.Em !function-list -.Xc -.Sm on -Apply the function or function-list only to the lines that are -.Em not +.I string2 +must be equal. +.TP 10 +(2)\fB! \fIfunction\fB +Don't. +Apply the +.I function +(or group, if +.I function +is `{') only to lines +.I not selected by the address(es). -.Pp -.It [0addr] Ns Em \&: Ns label -This function does nothing; it bears a label to which the -.Sq b -and -.Sq t -commands may branch. -.Pp -.It [1addr] Ns Em = -Write the line number to the standard output followed by a newline character. -.Pp -.It [0addr] -Empty lines are ignored. -.Pp -.It [0addr] Ns Em # -The -.Ql # -and the remainder of the line are ignored (treated as a comment), with -the single exception that if the first two characters in the file are -.Ql #n , -the default output is suppressed. -This is the same as specifying the -.Fl n -option on the command line. -.El -.Sh EXIT STATUS -.Ex -std sed -.Sh SEE ALSO -.Xr awk 1 , -.Xr ed 1 , -.Xr grep 1 , -.Xr regex 3 , -.Xr setbuf 3 , -.Xr re_format 7 -.Sh STANDARDS -The -.Nm -utility is compliant with the -.St -p1003.1-2008 -specification. -.Pp -The flags -.Op Fl aEru -are extensions to that specification. -.Pp -The use of newlines to separate multiple commands on the command line -is non-portable; -the use of newlines to separate multiple commands within a command file -.Pq Fl f Ar command_file -is portable. -.Sh HISTORY -A -.Nm -command appeared in -.At v7 . -.Sh CAVEATS -The use of semicolons to separate multiple commands -is not permitted for the following commands: -.Cm a , b , c , -.Cm i , r , t , -.Cm w , \&: , +.TP 10 +(0)\|\fB: \fIlabel\fR +This command does nothing; it bears a +.I label +for `b' and `t' commands to branch to. +.TP 10 +(1)\|\fB=\fR +Place the current line number on the standard output as a line. +.TP 10 +(2)\|\fB{\fR +Execute the following commands through a matching `}' +only when the pattern space is selected. +.TP 10 +(0)\| +An empty command is ignored. +.TP 10 +(0)\|\fB#\fP +Ignore the entire line, +except when the first two characters in the script are \fB#n\fP, +which has the same effect as the \f2\-n\fP command line option. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +for the +.B l and -.Cm # . +.B y +commands, +and the availability and composition of character classes +in basic regular expressions. +.SH SEE ALSO +ed(1), +grep(1), +awk(1), +locale(7) diff --git a/sed/sed.h b/sed/sed.h @@ -0,0 +1,191 @@ +/* + * sed -- stream editor + * + * Copyright 1975 Bell Telephone Laboratories, Incorporated + * + * Owner: lem + */ + +/* from Unix 7th Edition and Unix 32V sed */ +/* Sccsid @(#)sed.h 1.32 (gritter) 2/6/05 */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#if defined (SUS) || defined (SU3) || defined (S42) +#include <regex.h> +#endif /* SUS || SU3 || S42 */ + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif /* _IO_getc_unlocked */ +#ifdef _IO_putc_unlocked +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif /* _IO_putc_unlocked */ +#endif /* __GLIBC__ */ + +#define CEND 16 +#define CLNUM 14 + +#if defined (SUS) || defined (SU3) || defined (S42) +struct re_emu { + char *r_dummy; + regex_t r_preg; +}; +#endif /* SUS || SU3 || S42 */ + +extern int circf, ceof, nbra, sed; + +struct yitem { + struct yitem *y_nxt; + wint_t y_oc; + wint_t y_yc; + char y_mc[MB_LEN_MAX]; +}; + +extern int ABUFSIZE; +extern int LBSIZE; +extern struct reptr **abuf; +extern int aptr; +extern char *genbuf; +extern int gbend; +extern int lbend; +extern int hend; +extern char *linebuf; +extern char *holdsp; +extern int nflag; +extern long long *tlno; + +enum cmd { + ACOM = 01, + BCOM = 020, + CCOM = 02, + CDCOM = 025, + CNCOM = 022, + COCOM = 017, + CPCOM = 023, + DCOM = 03, + ECOM = 015, + EQCOM = 013, + FCOM = 016, + GCOM = 027, + CGCOM = 030, + HCOM = 031, + CHCOM = 032, + ICOM = 04, + LCOM = 05, + NCOM = 012, + PCOM = 010, + QCOM = 011, + RCOM = 06, + SCOM = 07, + TCOM = 021, + WCOM = 014, + CWCOM = 024, + YCOM = 026, + XCOM = 033 +}; + +extern char *cp; + +#define P(n) ((n) > 0 ? &ptrspace[n - 1] : (struct reptr *)0) +#define L(n) ((n) > 0 ? &ltab[n - 1] : (struct label *)0) +#define A(n) ((n) > 0 ? &abuf[n - 1] : (struct reptr **)0) + +#define slno(ep, n) ( \ + *(ep)++ = ((n) & 0xff000000) >> 24, \ + *(ep)++ = ((n) & 0x00ff0000) >> 16, \ + *(ep)++ = ((n) & 0x0000ff00) >> 8, \ + *(ep)++ = ((n) & 0x000000ff) \ + ) + +#define glno(p) ( \ + ((p)[0]&0377) << 24 | \ + ((p)[1]&0377) << 16 | \ + ((p)[2]&0377) << 8 | \ + ((p)[3]&0377) \ + ) + +struct reptr { + char *ad1; + char *ad2; + union { + char *re1; + int lb1; + } bptr; + char *rhs; + FILE *fcode; + enum cmd command; + short gfl; + char pfl; + char inar; + char negfl; + char nsub; +}; + +extern struct reptr *ptrspace; + +struct label { + char asc[8*MB_LEN_MAX + 1]; + int chain; + int address; +}; + +extern int status; +extern int multibyte; +extern int invchar; +extern int needdol; + +extern int eargc; + +extern struct reptr *pending; +extern char *badp; + +extern void execute(const char *); +extern void fatal(const char *, ...); +extern void nonfatal(const char *, ...); +extern void aptr_inc(void); +extern wint_t wc_get(char **, int); +#define fetch(s) (multibyte ? wc_get(s, 1) : (*(*(s))++ & 0377)) +#define peek(s) (multibyte ? wc_get(s, 0) : (**(s) & 0377)) +extern struct yitem *ylook(wint_t , struct yitem **, int); +extern void *smalloc(size_t); +extern void growsp(const char *); diff --git a/sed/sed0.c b/sed/sed0.c @@ -0,0 +1,1266 @@ +/* from Unix 7th Edition sed */ +/* Sccsid @(#)sed0.c 1.64 (gritter) 3/12/05> */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <unistd.h> +#include <stdlib.h> +#include <locale.h> +#include <libgen.h> +#include <stdarg.h> +#include <wchar.h> +#include "sed.h" + +int ABUFSIZE; +struct reptr **abuf; +int aptr; +char *genbuf; +int gbend; +int lbend; +int hend; +char *linebuf; +char *holdsp; +int nflag; +long long *tlno; +char *cp; + +int status; +int multibyte; +int invchar; +int needdol; + +int eargc; + +struct reptr *ptrspace; +struct reptr *pending; +char *badp; + +static const char CGMES[] = "\1command garbled: %s"; +static const char TMMES[] = "Too much text: %s"; +static const char LTL[] = "Label too long: %s"; +static const char LINTL[] = "line too long"; +static const char AD0MES[] = "No addresses allowed: %s"; +static const char AD1MES[] = "Only one address allowed: %s"; +static FILE **fcode; +static FILE *fin; +static char *lastre; +static wchar_t sed_seof; +static int PTRSIZE; +static int eflag; +static int gflag; +static int nlno; +static char **fname; +static int nfiles; +static int rep; +static struct label *ltab; +static int lab; +static size_t LABSIZE; +static int labtab = 1; +static int depth; +static char **eargv; +static int *cmpend; +static size_t DEPTH; +static char bad; +static char compfl; +static char *progname; +static char *(*ycomp)(char **); +static int executing; + +static void fcomp(void); +static char *compsub(char **, char *); +static int rline(void); +static char *address(char **); +static int cmp(const char *, const char *); +static void text(char **); +static int search(struct label *); +static void dechain(void); +static char *ycomp_sb(char **); +static char *ycomp_mb(char **); +static void lab_inc(void); +static void rep_inc(void); +static void depth_check(void); +static void *srealloc(void *, size_t); +static void *scalloc(size_t, size_t); +static char *sed_compile(char **); +static void wfile(void); +static void morefiles(void); + +static char *null; +#define check(p, buf, sz, incr, op) \ + if (&p[1] >= &(buf)[sz]) { \ + size_t ppos = p - buf; \ + size_t opos = op - buf; \ + buf = srealloc(buf, (sz += incr) * sizeof *(buf)); \ + p = &(buf)[ppos]; \ + if (op != NULL) \ + op = &(buf)[opos]; \ + } + +int +main(int argc, char **argv) +{ + int c; + const char optstr[] = "nf:e:g"; + + sed = 1; + progname = basename(argv[0]); + eargc = argc; + eargv = argv; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif /* __GLIBC__ */ +#if defined (SUS) || defined (SU3) || defined (S42) + setlocale(LC_COLLATE, ""); +#endif /* SUS || SU3 || S42 */ + setlocale(LC_CTYPE, ""); + multibyte = MB_CUR_MAX > 1; + ycomp = multibyte ? ycomp_mb : ycomp_sb; + badp = &bad; + aptr_inc(); + lab_inc(); + lab_inc(); /* 0 reserved for end-pointer -> labtab = 1 */ + growsp(NULL); + rep_inc(); + pending = 0; + depth = 0; + morefiles(); + fcode[0] = stdout; + nfiles = 1; + morefiles(); + + if(eargc == 1) + exit(0); + while ((c = getopt(eargc, eargv, optstr)) != EOF) { + switch (c) { + case 'n': + nflag++; + continue; + + case 'f': + if((fin = fopen(optarg, "r")) == NULL) + fatal("Cannot open pattern-file: %s", optarg); + + fcomp(); + fclose(fin); + continue; + + case 'e': + eflag++; + fcomp(); + eflag = 0; + continue; + + case 'g': + gflag++; + continue; + + default: + exit(2); + } + } + + eargv += optind, eargc -= optind; + + + if(compfl == 0 && *eargv) { + optarg = *eargv++; + eargc--; + eflag++; + fcomp(); + eflag = 0; + } + + if(depth) + fatal("Too many {'s"); + + L(labtab)->address = rep; + + dechain(); + +/* abort(); */ /*DEBUG*/ + + executing++; + if(eargc <= 0) + execute((char *)NULL); + else while(--eargc >= 0) { + execute(*eargv++); + } + fclose(stdout); + return status; +} + +static void +fcomp(void) +{ + + register char *op, *tp, *q; + int pt, pt1; + int lpt; + + compfl = 1; + op = lastre; + + if(rline() < 0) return; + if(*linebuf == '#') { + if(linebuf[1] == 'n') + nflag = 1; + } + else { + cp = linebuf; + goto comploop; + } + + for(;;) { + if(rline() < 0) break; + + cp = linebuf; + +comploop: +/* fprintf(stdout, "cp: %s\n", cp); */ /*DEBUG*/ + while(*cp == ' ' || *cp == '\t') cp++; + if(*cp == '\0' || *cp == '#') continue; + if(*cp == ';') { + cp++; + goto comploop; + } + + q = address(&P(rep)->ad1); + if(q == badp) + fatal(CGMES, linebuf); + + if(q != 0 && q == P(rep)->ad1) { + if(op) + P(rep)->ad1 = op; + else + fatal("First RE may not be null"); + } else if(q == 0) { + P(rep)->ad1 = 0; + } else { + op = P(rep)->ad1; + if(*cp == ',' || *cp == ';') { + cp++; + q = address(&P(rep)->ad2); + if(q == badp || q == 0) + fatal(CGMES, linebuf); + if(q == P(rep)->ad2) + P(rep)->ad2 = op; + else + op = P(rep)->ad2; + + } else + P(rep)->ad2 = 0; + } + + while(*cp == ' ' || *cp == '\t') cp++; + +swit: + switch(*cp++) { + + default: + fatal("Unrecognized command: %s", linebuf); + /*NOTREACHED*/ + + case '!': + P(rep)->negfl = 1; + goto swit; + + case '{': + P(rep)->command = BCOM; + P(rep)->negfl = !(P(rep)->negfl); + depth_check(); + cmpend[depth++] = rep; + rep_inc(); + if(*cp == '\0') continue; + + goto comploop; + + case '}': + if(P(rep)->ad1) + fatal(AD0MES, linebuf); + + if(--depth < 0) + fatal("Too many }'s"); + P(cmpend[depth])->bptr.lb1 = rep; + + continue; + + case '=': + P(rep)->command = EQCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + break; + + case ':': + if(P(rep)->ad1) + fatal(AD0MES, linebuf); + + while(*cp++ == ' '); + cp--; + + + tp = L(lab)->asc; + while((*tp++ = *cp++)) + if(tp >= &(L(lab)->asc[sizeof + L(lab)->asc])) + fatal(LTL, linebuf); + *--tp = '\0'; + + if(lpt = search(L(lab))) { + if(L(lpt)->address) + fatal("Duplicate labels: %s", + linebuf); + } else { + L(lab)->chain = 0; + lpt = lab; + lab_inc(); + } + L(lpt)->address = rep; + + continue; + + case 'a': + P(rep)->command = ACOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + break; + case 'c': + P(rep)->command = CCOM; + if(*cp == '\\') cp++; + if(*cp++ != ('\n')) + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + needdol = 1; + break; + case 'i': + P(rep)->command = ICOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + if(*cp == '\\') cp++; + if(*cp++ != ('\n')) + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + break; + + case 'g': + P(rep)->command = GCOM; + break; + + case 'G': + P(rep)->command = CGCOM; + break; + + case 'h': + P(rep)->command = HCOM; + break; + + case 'H': + P(rep)->command = CHCOM; + break; + + case 't': + P(rep)->command = TCOM; + goto jtcommon; + + case 'b': + P(rep)->command = BCOM; +jtcommon: + while(*cp++ == ' '); + cp--; + + if(*cp == '\0') { + if((pt = L(labtab)->chain) != 0) { + while((pt1 = P(pt)->bptr.lb1) != 0) + pt = pt1; + P(pt)->bptr.lb1 = rep; + } else + L(labtab)->chain = rep; + break; + } + tp = L(lab)->asc; + while((*tp++ = *cp++)) + if(tp >= &(L(lab)->asc[sizeof + L(lab)->asc])) + fatal(LTL, linebuf); + cp--; + *--tp = '\0'; + + if(lpt = search(L(lab))) { + if(L(lpt)->address) { + P(rep)->bptr.lb1 = L(lpt)->address; + } else { + pt = L(lpt)->chain; + while((pt1 = P(pt)->bptr.lb1) != 0) + pt = pt1; + P(pt)->bptr.lb1 = rep; + } + } else { + L(lab)->chain = rep; + L(lab)->address = 0; + lab_inc(); + } + break; + + case 'n': + P(rep)->command = NCOM; + break; + + case 'N': + P(rep)->command = CNCOM; + break; + + case 'p': + P(rep)->command = PCOM; + break; + + case 'P': + P(rep)->command = CPCOM; + break; + + case 'r': + P(rep)->command = RCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); +#if !defined (SUS) && !defined (SU3) + if(*cp++ != ' ') + fatal(CGMES, linebuf); +#else /* SUS, SU3 */ + while (*cp == ' ' || *cp == '\t') + cp++; +#endif /* SUS, SU3 */ + text(&P(rep)->bptr.re1); + break; + + case 'd': + P(rep)->command = DCOM; + break; + + case 'D': + P(rep)->command = CDCOM; + P(rep)->bptr.lb1 = 1; + break; + + case 'q': + P(rep)->command = QCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + break; + + case 'l': + P(rep)->command = LCOM; + break; + + case 's': + P(rep)->command = SCOM; + sed_seof = fetch(&cp); + q = sed_compile(&P(rep)->bptr.re1); + if(q == badp) + fatal(CGMES, linebuf); + if(q == P(rep)->bptr.re1) { + if (op == NULL) + fatal("First RE may not be null"); + P(rep)->bptr.re1 = op; + } else { + op = P(rep)->bptr.re1; + } + + if(compsub(&P(rep)->rhs, &P(rep)->nsub) == badp) + fatal(CGMES, linebuf); + sloop: if(*cp == 'g') { + cp++; + P(rep)->gfl = -1; + goto sloop; + } else if(gflag) + P(rep)->gfl = -1; + if (*cp >= '0' && *cp <= '9') { + while (*cp >= '0' && *cp <= '9') { + if (P(rep)->gfl == -1) + P(rep)->gfl = 0; + P(rep)->gfl = P(rep)->gfl * 10 + + *cp++ - '0'; + } + goto sloop; + } +#if !defined (SUS) && !defined (SU3) + if (P(rep)->gfl > 0 && P(rep)->gfl > 512) + fatal("Suffix too large - 512 max: %s", linebuf); +#endif + + if(*cp == 'p') { + cp++; + P(rep)->pfl = 1; + goto sloop; + } + + if(*cp == 'P') { + cp++; + P(rep)->pfl = 2; + goto sloop; + } + + if(*cp == 'w') { + cp++; + wfile(); + } + break; + + case 'w': + P(rep)->command = WCOM; + wfile(); + break; + + case 'x': + P(rep)->command = XCOM; + break; + + case 'y': + P(rep)->command = YCOM; + sed_seof = fetch(&cp); + if (ycomp(&P(rep)->bptr.re1) == badp) + fatal(CGMES, linebuf); + break; + + } + rep_inc(); + + if(*cp++ != '\0') { + if(cp[-1] == ';') + goto comploop; + fatal(CGMES, linebuf); + } + + } + P(rep)->command = 0; + lastre = op; +} + +static char * +compsub(char **rhsbuf, char *nsubp) +{ + register char *p, *op, *oq; + char *q; + wint_t c; + size_t sz = 32; + + *rhsbuf = smalloc(sz); + p = *rhsbuf; + q = cp; + *nsubp = 0; + for(;;) { + op = p; + oq = q; + if((c = fetch(&q)) == '\\') { + check(p, *rhsbuf, sz, 32, op) + *p = '\\'; + oq = q; + c = fetch(&q); + do { + check(p, *rhsbuf, sz, 32, op) + *++p = *oq++; + } while (oq < q); + if(c > nbra + '0' && c <= '9') + return(badp); + if (c > *nsubp + '0' && c <= '9') + *nsubp = c - '0'; + check(p, *rhsbuf, sz, 32, op) + p++; + continue; + } else { + do { + check(p, *rhsbuf, sz, 32, op) + *p++ = *oq++; + } while (oq < q); + p--; + } + if(c == sed_seof) { + check(p, *rhsbuf, sz, 32, op) + *op++ = '\0'; + cp = q; + return(op); + } + check(p, *rhsbuf, sz, 32, op) + if(*p++ == '\0') { + return(badp); + } + + } +} + +#define rlinechk() if (c >= lbend-2) \ + growsp(LINTL) + +static int +rline(void) +{ + register char *q; + register int c; + register int t; + static char *saveq; + + c = -1; + + if(eflag) { + if(eflag > 0) { + eflag = -1; + q = optarg; + rlinechk(); + while(linebuf[++c] = *q++) { + rlinechk(); + if(linebuf[c] == '\\') { + if((linebuf[++c] = *q++) == '\0') { + rlinechk(); + saveq = 0; + return(-1); + } else + continue; + } + if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + saveq = q; + return(1); + } + } + saveq = 0; + return(1); + } + if((q = saveq) == 0) return(-1); + + while(linebuf[++c] = *q++) { + rlinechk(); + if(linebuf[c] == '\\') { + if((linebuf[++c] = *q++) == '0') { + rlinechk(); + saveq = 0; + return(-1); + } else + continue; + } + if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + saveq = q; + return(1); + } + } + saveq = 0; + return(1); + } + + while((t = getc(fin)) != EOF) { + rlinechk(); + linebuf[++c] = (char)t; + if(linebuf[c] == '\\') { + t = getc(fin); + rlinechk(); + linebuf[++c] = (char)t; + } + else if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + return(1); + } + } + linebuf[++c] = '\0'; + return(-1); +} + +static char * +address(char **expbuf) +{ + register char *rcp, *ep; + long long lno; + + *expbuf = NULL; + if(*cp == '$') { + cp++; + ep = *expbuf = smalloc(2 * sizeof *expbuf); + *ep++ = CEND; + *ep++ = ceof; + needdol = 1; + return(ep); + } + + if(*cp == '/' || *cp == '\\') { + if (*cp == '\\') + cp++; + sed_seof = fetch(&cp); + return(sed_compile(expbuf)); + } + + rcp = cp; + lno = 0; + + while(*rcp >= '0' && *rcp <= '9') + lno = lno*10 + *rcp++ - '0'; + + if(rcp > cp) { + if (nlno > 020000000000 || + (tlno = realloc(tlno, (nlno+1)*sizeof *tlno)) == NULL) + fatal("Too many line numbers"); + ep = *expbuf = smalloc(6 * sizeof *expbuf); + *ep++ = CLNUM; + slno(ep, nlno); + tlno[nlno++] = lno; + *ep++ = ceof; + cp = rcp; + return(ep); + } + return(0); +} + +static int +cmp(const char *a, const char *b) +{ + register const char *ra, *rb; + + ra = a - 1; + rb = b - 1; + + while(*++ra == *++rb) + if(*ra == '\0') return(0); + return(1); +} + +static void +text(char **textbuf) +{ + register char *p, *oq; + char *q; + size_t sz = 128; + + *textbuf = smalloc(sz); + p = *textbuf; + q = cp; + for(;;) { + + oq = q; + if(fetch(&q) == '\\') { + oq = q; + fetch(&q); + } + while(oq < q) + *p++ = *oq++; + if(p[-1] == '\0') { + cp = --q; + return; + } + check(p, *textbuf, sz, 128, null) + } +} + +static int +search(struct label *ptr) +{ + struct label *rp; + + rp = L(labtab); + while(rp < ptr) { + if(cmp(rp->asc, ptr->asc) == 0) + return(rp - L(labtab) + 1); + rp++; + } + + return(0); +} + + +static void +dechain(void) +{ + struct label *lptr; + int rptr, trptr; + + for(lptr = L(labtab); lptr < L(lab); lptr++) { + + if(lptr->address == 0) + fatal("Undefined label: %s", lptr->asc); + + if(lptr->chain) { + rptr = lptr->chain; + while((trptr = P(rptr)->bptr.lb1) != 0) { + P(rptr)->bptr.lb1 = lptr->address; + rptr = trptr; + } + P(rptr)->bptr.lb1 = lptr->address; + } + } +} + +static char * +ycomp_sb(char **expbuf) +{ + register int c, d; + register char *ep, *tsp; + char *sp; + + *expbuf = smalloc(0400); + ep = *expbuf; + for(c = 0; !(c & 0400); c++) + ep[c] = '\0'; + sp = cp; + for(tsp = cp; *tsp != sed_seof; tsp++) { + if(*tsp == '\\') + tsp++; + if(*tsp == '\n' || *tsp == '\0') + return(badp); + } + tsp++; + + while((c = *sp++ & 0377) != sed_seof) { + if(c == '\\') { + c = *sp == 'n' ? '\n' : *sp; + sp++; + } + if((ep[c] = d = *tsp++ & 0377) == '\\') { + ep[c] = *tsp == 'n' ? '\n' : *tsp; + tsp++; + } + if(d != '\\' && ep[c] == sed_seof || ep[c] == '\0') + return(badp); + } + if(*tsp != sed_seof) + return(badp); + cp = ++tsp; + + for(c = 0; !(c & 0400); c++) + if(ep[c] == 0) + ep[c] = (char)c; + + return(ep + 0400); +} + +static char * +ycomp_mb(char **expbuf) +{ + struct yitem **yt, *yp; + register wint_t c, d; + char *otsp, *tsp, *sp, *mp; + + tsp = sp = cp; + while ((c = fetch(&tsp)) != sed_seof) { + if (c == '\\') + c = fetch(&tsp); + if (c == '\n' || c == '\0') + return badp; + } + yt = scalloc(200, sizeof *yt); + while ((c = fetch(&sp)) != sed_seof) { + if (c == '\\') { + if ((d = fetch(&sp)) == 'n') + c = '\n'; + else + c = d; + } + otsp = tsp; + d = fetch(&tsp); + yp = ylook(c, yt, 1); + yp->y_oc = c; + if ((yp->y_yc = d) == '\\') { + otsp = tsp; + if ((c = fetch(&tsp)) == 'n') + yp->y_yc = '\n'; + else + yp->y_yc = c; + } + if (d != '\\' && yp->y_yc == sed_seof || yp->y_yc == '\0') + return badp; + mp = yp->y_mc; + if (yp->y_yc != '\n') + while (otsp < tsp) + *mp++ = *otsp++; + else + *mp++ = '\n'; + *mp = '\0'; + } + if (fetch(&tsp) != sed_seof) + return badp; + cp = tsp; + *expbuf = (char *)yt; + return &(*expbuf)[1]; +} + +static void +rep_inc(void) +{ + register char *p; + const int chunk = 16; + + if (++rep >= PTRSIZE) { + ptrspace = srealloc(ptrspace, + (PTRSIZE += chunk) * sizeof *ptrspace); + for (p = (char *)&ptrspace[PTRSIZE - chunk]; + p < (char *)&ptrspace[PTRSIZE]; p++) + *p = '\0'; + } +} + +static void +lab_inc(void) +{ + register char *p; + const int chunk = 8; + + if (++lab >= LABSIZE) { + ltab = srealloc(ltab, (LABSIZE += chunk) * sizeof *ltab); + for (p = (char *)&ltab[LABSIZE - chunk]; + p < (char *)&ltab[LABSIZE]; p++) + *p = '\0'; + } +} + +void +aptr_inc(void) +{ + register char *p; + const int chunk = 8; + + if (++aptr > ABUFSIZE) { + abuf = srealloc(abuf, (ABUFSIZE += chunk) * sizeof *abuf); + for (p = (char *)&abuf[ABUFSIZE - chunk]; + p < (char *)&abuf[ABUFSIZE]; p++) + *p = '\0'; + } +} + +static void +depth_check(void) +{ + if (depth + 1 > DEPTH) + cmpend = srealloc(cmpend, (DEPTH += 8) * sizeof *cmpend); +} + +void +nonfatal(const char *afmt, ...) +{ + va_list ap; + const char *fmt; + + if (*afmt == '\1') { + fprintf(stderr, "%s: ", progname); + fmt = &afmt[1]; + } else + fmt = afmt; + va_start(ap, afmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + status |= 1; +} + +void +fatal(const char *afmt, ...) +{ + va_list ap; + const char *fmt; + + if (*afmt == '\1') { + fprintf(stderr, "%s: ", progname); + fmt = &afmt[1]; + } else + fmt = afmt; + va_start(ap, afmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(2); +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) + fatal(TMMES, linebuf); + return p; +} + +void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = calloc(nmemb, size)) == NULL) + fatal(TMMES, linebuf); + return p; +} + +#if defined (SUS) || defined (SU3) || defined (S42) +static char * +sed_compile(char **ep) +{ + struct re_emu *re; + static char *pat; + static size_t patsz; + register char *p, *oc; + wint_t c, d; + + if (*cp != sed_seof) + nbra = 0; + if (patsz == 0) + pat = smalloc(patsz = 32); + p = pat; + do { + oc = cp; + if ((c = fetch(&cp)) == sed_seof) + *p = '\0'; + else if (c == '\\') { + oc = cp; + if ((c = fetch(&cp)) == 'n') + *p = '\n'; + else { + check(p, pat, patsz, 32, null); + *p++ = '\\'; + if (c == '(') + nbra++; + goto normchar; + } + } else if (c == '[') { + check(p, pat, patsz, 32, null); + *p++ = c; + d = WEOF; + do { + oc = cp; + c = fetch(&cp); + if (c == '\0') + goto normchar; + do { + check(p, pat, patsz, 32, null); + *p++ = *oc++; + } while (oc < cp); + if (d == '[' && (c == ':' || c == '.' || + c == '=')) { + d = c; + do { + oc = cp; + c = fetch(&cp); + if (c == '\0') + goto normchar; + do { + check(p, pat, patsz,32, + null); + *p++ = *oc++; + } while (oc < cp); + } while (c != d || peek(&cp) != ']'); + oc = cp; + c = fetch(&cp); + do { + check(p, pat, patsz, 32, null); + *p++ = *oc++; + } while (oc < cp); + c = WEOF; /* == reset d and continue */ + } + d = c; + } while (c != ']'); + p--; + } else { + normchar: do { + check(p, pat, patsz, 32, null) + *p++ = *oc++; + } while (oc < cp); + p--; + } + check(p, pat, patsz, 32, null); + } while (*p++ != '\0'); + re = scalloc(1, sizeof *re); + *ep = (char *)re; + if (*pat == '^') + **ep = 1; + if (*pat != '\0') { + int reflags = 0; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif /* REG_ANGLES */ +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif /* SU3 && AVOIDNULL */ + if (regcomp(&re->r_preg, pat, reflags) != 0) + re = (struct re_emu *)badp; + } else + **ep = 2; + p = (char *)re; + if (p != badp && *pat) + p++; + return p; +} +#else /* !SUS, !SU3, !S42 */ +static char * +sed_compile(char **ep) +{ + extern char *compile(char *, char *, char *, int); + register char *p; + size_t sz; + + for (sz = 0, p = cp; *p; p++) + if (*p == '[') + sz += 32; + sz += 2 * (p - cp) + 5; + *ep = smalloc(sz); + (*ep)[1] = '\0'; + p = compile(NULL, &(*ep)[1], &(*ep)[sz], sed_seof); + if (p == &(*ep)[1]) + return *ep; + **ep = circf; + return p; +} +#endif /* !SUS, !SU3, !S42 */ + +wint_t +wc_get(char **sc, int move) +{ + wint_t c; + char *p = *sc; + wchar_t wcbuf; + int len; + + if ((*p & 0200) == 0) { + c = *p; + p += (len = 1); + invchar = 0; + } else if ((len = mbtowc(&wcbuf, p, MB_LEN_MAX)) < 0) { + if (!executing) + fatal("invalid multibyte character: %s", p); + c = (*p++ & 0377); + mbtowc(NULL, NULL, 0); + invchar = 1; + } else if (len == 0) { + c = '\0'; + p++; + invchar = 0; + } else { + c = wcbuf; + p += len; + invchar = 0; + } + if (move) + *sc = p; + return c; +} + +/* + * Note that this hash is not optimized to distribute the items + * equally to all buckets. y commands typically handle only a + * small part of the alphabet, thus most characters will have + * no entry in the hash table. If no list exists in the bucket + * for the hash of these characters, the function can return + * quickly. + */ +#define yhash(c) (c & 0177) + +struct yitem * +ylook(wint_t c, struct yitem **yt, int make) +{ + struct yitem *yp; + int h; + + yp = yt[h = yhash(c)]; + while (yp != NULL) { + if (yp->y_oc == c) + break; + yp = yp->y_nxt; + } + if (make && yp == NULL) { + yp = scalloc(1, sizeof *yp); + yp->y_oc = c; + yp->y_nxt = yt[h]; + yt[h] = yp; + } + return yp; +} + +void +growsp(const char *msg) +{ + const int incr = 128; + int olbend, ogbend, ohend; + + olbend = lbend; + ogbend = gbend; + ohend = hend; + if ((linebuf = realloc(linebuf, lbend += incr)) == NULL || + (genbuf = realloc(genbuf, gbend += incr)) == NULL || + (holdsp = realloc(holdsp, hend += incr)) == NULL) + fatal(msg ? msg : "Cannot malloc space"); + while (olbend < lbend) + linebuf[olbend++] = '\0'; + while (ogbend < gbend) + genbuf[ogbend++] = '\0'; + while (ohend < hend) + holdsp[ohend++] = '\0'; +} + +static void +wfile(void) +{ + int i; + +#if !defined (SUS) && !defined (SU3) + if(*cp++ != ' ') + fatal(CGMES, linebuf); +#else /* SUS, SU3 */ + while (*cp == ' ' || *cp == '\t') + cp++; +#endif /* SUS, SU3 */ + + text(&fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(fname[i] != NULL && cmp(fname[nfiles], fname[i]) == 0) { + P(rep)->fcode = fcode[i]; + free(fname[nfiles]); + return; + } + + if((P(rep)->fcode = fopen(fname[nfiles], "w")) == NULL) + fatal("Cannot create %s", fname[nfiles]); + fcode[nfiles++] = P(rep)->fcode; + morefiles(); +} + +static void +morefiles(void) +{ + if ((fname = realloc(fname, (nfiles+1) * sizeof *fname)) == 0 || + (fcode = realloc(fcode, (nfiles+1) * sizeof *fcode)) == 0) + fatal("Too many files in w commands"); + fname[nfiles] = 0; + fcode[nfiles] = 0; +} diff --git a/sed/sed1.c b/sed/sed1.c @@ -0,0 +1,917 @@ +/* from Unix 7th Edition sed */ +/* Sccsid @(#)sed1.c 1.42 (gritter) 2/6/05> */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <ctype.h> +#include <wchar.h> +#include <wctype.h> +#include "sed.h" + +#if !defined (SUS) && !defined (SU3) && !defined(S42) +#define INIT extern char *cp, *badp; \ + register char *sp = cp; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) { cp = sp; return ep; } +#define ERROR(c) { cp = sp; return badp; } + +#define regexp_h_malloc(n) smalloc(n) +#include <regexp.h> +#endif /* !SUS && !SU3 && !S42 */ + +#ifndef CCEOF +#ifdef CEOF +#define CCEOF CEOF +#else /* !CEOF */ +#define CCEOF 22 +#endif /* !CEOF */ +#endif /* !CCEOF */ + +int ceof = CCEOF; + +#if !defined (SUS) && !defined (SU3) +static const char *trans[] = { + "\\00", + "\\01", + "\\02", + "\\03", + "\\04", + "\\05", + "\\06", + "\\07", + "-<", + "->", + "\n", + "\\13", + "\\14", + "\\15", + "\\16", + "\\17", + "\\20", + "\\21", + "\\22", + "\\23", + "\\24", + "\\25", + "\\26", + "\\27", + "\\30", + "\\31", + "\\32", + "\\33", + "\\34", + "\\35", + "\\36", + "\\37" +}; +#endif /* !SUS, !SU3 */ + +static char *cbp; +static char *ebp; +static int dolflag; +static int sflag; +static int jflag; +static int delflag; +static long long lnum; +static char ibuf[512]; +static int ibrd; +static int mflag; +static int f = -1; +static int spend; +static int genend; +static int hspend; + +static void command(struct reptr *); +static int match(char *, int, int); +static int substitute(struct reptr *); +static void dosub(char *); +static int place(int, int, int); +static int gline(int); +static void arout(void); +static void lcom(wint_t, int); +static void oout(int); +static void mout(const char *); +static void nout(wint_t); +static void wout(wint_t); +static void lout(int); + +#if defined (SUS) || defined (SU3) || defined (S42) +#define NBRA 9 +int sed; +int nbra; +int circf; +static char *braslist[NBRA]; +static char *braelist[NBRA]; +static char *loc1, *loc2, *locs; + +static int +step(char *line, char *pattern) +{ + struct re_emu *re = (struct re_emu *)&pattern[-1]; + regmatch_t bralist[NBRA+1]; + int eflag = 0; + int res; + int i, nsub; + + if (circf == 2) /* empty pattern */ + return 0; + if (locs) + eflag |= REG_NOTBOL; + /* + * Don't fetch more match locations than necessary since this + * might prevent use of DFA. + */ + nsub = mflag; + if ((res = regexec(&re->r_preg, line, nsub, bralist, eflag)) == 0) { + if (nsub > 0) { + loc1 = line + bralist[0].rm_so; + loc2 = line + bralist[0].rm_eo; + for (i = 1; i < nsub; i++) { + if (bralist[i].rm_so != -1) { + braslist[i-1] = line + bralist[i].rm_so; + braelist[i-1] = line + bralist[i].rm_eo; + } else + braslist[i-1] = braelist[i-1] = NULL; + } + } + } + return res == 0; +} +#endif /* SUS || SU3 || S42 */ + +static int lcomlen; +static int Braslist[NBRA]; +static int Braelist[NBRA]; +static int Loc1, Loc2; + +void +execute(const char *file) +{ + register char *p1, *p2; + register struct reptr *ipc; + int c; + int execc; + + if (f >= 0) + close(f); + if (file) { + if ((f = open(file, O_RDONLY)) < 0) { + nonfatal("Can't open %s", file); + return; + } + } else + f = 0; + + ebp = ibuf; + cbp = ibuf; + + if(pending) { + ipc = pending; + pending = 0; + goto yes; + } + + for(;;) { + if((execc = gline(0)) < 0) { + if (f >= 0) { + close(f); + f = -1; + } + return; + } + spend = execc; + + for(ipc = ptrspace; ipc->command; ) { + + p1 = ipc->ad1; + p2 = ipc->ad2; + + if(p1) { + + if(ipc->inar) { + if(*p2 == CEND) { + p1 = 0; + } else if(*p2 == CLNUM) { + c = glno(&p2[1]); + if(lnum > tlno[c]) { + ipc->inar = 0; + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + if(lnum == tlno[c]) { + ipc->inar = 0; + } + } else if(match(p2, 0, 0)) { + ipc->inar = 0; + } + } else if(*p1 == CEND) { + if(!dolflag) { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + + } else if(*p1 == CLNUM) { + c = glno(&p1[1]); + if(lnum != tlno[c]) { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + if(p2) { + ipc->inar = 1; +#if defined (SUS) || defined (SU3) + goto ichk; +#endif /* SUS, SU3 */ + } + } else if(match(p1, 0, 0)) { + if(p2) { + ipc->inar = 1; +#if defined (SUS) || defined (SU3) + ichk: if (*p2 == CLNUM) { + c = glno(&p2[1]); + if (lnum >= tlno[c]) + ipc->inar = 0; + } +#endif /* SUS, SU3 */ + } + } else { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + } + + if(ipc->negfl) { + ipc++; + continue; + } + yes: + command(ipc); + + if(delflag) + break; + + if(jflag) { + jflag = 0; + if((ipc = P(ipc->bptr.lb1)) == 0) { + ipc = ptrspace; + break; + } + } else + ipc++; + + } + if(!nflag && !delflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + + if(A(aptr) > abuf) { + arout(); + } + + delflag = 0; + + } +} + +static int +match(char *expbuf, int gf, int needloc) +{ + register char *p1; + int i, val; + + if(gf) { + if(*expbuf) return(0); +#if defined (SUS) || defined (SU3) || defined (S42) + if (loc1 == loc2) { + int n; + wchar_t wc; + if (multibyte && (n = mbtowc(&wc, &linebuf[Loc2], + MB_LEN_MAX)) > 0) + Loc2 += n; + else + Loc2++; + } +#endif + locs = p1 = loc2 = &linebuf[Loc2]; + } else { + p1 = linebuf; + locs = 0; + } + + mflag = needloc; + circf = *expbuf++; + val = step(p1, expbuf); + for (i = 0; i < NBRA; i++) { + Braslist[i] = braslist[i] - linebuf; + Braelist[i] = braelist[i] - linebuf; + } + Loc1 = loc1 - linebuf; + Loc2 = loc2 - linebuf; + return val; +} + +static int +substitute(struct reptr *ipc) +{ + int matchcnt = 1; + + if (match(ipc->bptr.re1, 0, ipc->nsub + 1) == 0) + return(0); + + sflag = 0; + if (ipc->gfl >= -1 && ipc->gfl <= 1) + dosub(ipc->rhs); + + if(ipc->gfl != 0) { + while(linebuf[Loc2]) { + if(match(ipc->bptr.re1, 1, ipc->nsub + 1) == 0) + break; + matchcnt++; + if (ipc->gfl == -1 || ipc->gfl == matchcnt) + dosub(ipc->rhs); + } + } + return(1); +} + +static void +dosub(char *rhsbuf) +{ + register int lc, sc; + register char *rp; + int c; + + sflag = 1; + lc = 0; /*linebuf*/ + sc = 0; /*genbuf*/ + rp = rhsbuf; + while (lc < Loc1) + genbuf[sc++] = linebuf[lc++]; + while((c = *rp++) != 0) { + if (c == '&') { + sc = place(sc, Loc1, Loc2); + continue; + } else if (c == '\\') { + c = *rp++; + if (c >= '1' && c < NBRA+'1') { + sc = place(sc, Braslist[c-'1'], + Braelist[c-'1']); + continue; + } + } + if (sc >= gbend) + growsp("output line too long."); + genbuf[sc++] = (char)c; + } + lc = Loc2; + Loc2 = sc; + do { + if (sc >= gbend) + growsp("Output line too long."); + } while (genbuf[sc++] = linebuf[lc++], lc <= spend); + genend = sc-1; + lc = 0; /*linebuf*/ + sc = 0; /*genbuf*/ + while (linebuf[lc++] = genbuf[sc++], sc <= genend); + spend = lc-1; +} + +static int +place(int asc, int al1, int al2) +{ + register int sc; + register int l1, l2; + + sc = asc; + l1 = al1; + l2 = al2; + while (l1 < l2) { + if (sc >= gbend) + growsp("Output line too long."); + genbuf[sc++] = linebuf[l1++]; + } + return(sc); +} + +static void +command(struct reptr *ipc) +{ + register int i; + wint_t c; + register char *p1, *p2; + int k1, k2, k3; + char *lp; + int execc; + + + switch(ipc->command) { + + case ACOM: + *A(aptr) = ipc; + aptr_inc(); + *A(aptr) = 0; + break; + + case CCOM: + delflag = 1; + if(!ipc->inar || dolflag) { + for(p1 = ipc->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + } + break; + case DCOM: + delflag++; + break; + case CDCOM: + p1 = p2 = linebuf; + + while(*p1 != '\n') { + if(p1++ == &linebuf[spend]) { + delflag++; + return; + } + } + + p1++; + while(*p2++ = *p1++, p1 <= &linebuf[spend]); + spend = p2-1 - linebuf; + jflag++; + break; + + case EQCOM: + fprintf(stdout, "%lld\n", lnum); + break; + + case GCOM: + p1 = linebuf; + p2 = holdsp; + while(*p1++ = *p2++, p2 <= &holdsp[hspend]); + spend = p1-1 - linebuf; + break; + + case CGCOM: + linebuf[spend++] = '\n'; + k1 = spend; + k2 = 0; /*holdsp*/ + do { + if(k1 >= lbend) + growsp(NULL); + } while(linebuf[k1++] = holdsp[k2++], k2 <= hspend); + spend = k1-1; + break; + + case HCOM: + p1 = holdsp; + p2 = linebuf; + while(*p1++ = *p2++, p2 <= &linebuf[spend]); + hspend = p1-1 - holdsp; + break; + + case CHCOM: + holdsp[hspend++] = '\n'; + k1 = hspend; + k2 = 0; /*linebuf*/ + do { + if(k1 >= hend) + growsp("\1hold space overflow !"); + } while(holdsp[k1++] = linebuf[k2++], k2 <= spend); + hspend = k1-1; + break; + + case ICOM: + for(p1 = ipc->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + break; + + case BCOM: + jflag = 1; + break; + + case LCOM: + lp = linebuf; + lcomlen = 0; + while (lp < &linebuf[spend]) { + c = fetch(&lp); + lcom(c, invchar == 0); + } +#if defined (SUS) || defined (SU3) + putc('$', stdout); +#endif /* SUS, SU3 */ + putc('\n', stdout); + break; + + case NCOM: + if(!nflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + + if(A(aptr) > abuf) + arout(); + if((execc = gline(0)) < 0) { + pending = ipc; + delflag = 1; + break; + } + spend = execc; + + break; + case CNCOM: + if(A(aptr) > abuf) + arout(); + linebuf[spend++] = '\n'; + if((execc = gline(spend)) < 0) { + pending = ipc; + delflag = 1; + break; + } + spend = execc; + break; + + case PCOM: + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + break; + case CPCOM: + cpcom: + for(p1 = linebuf; *p1 != '\n' && p1<&linebuf[spend]; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + break; + + case QCOM: + if(!nflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + if(A(aptr) > abuf) arout(); + fclose(stdout); + if (ibrd > 0) + lseek(f, -ibrd, SEEK_CUR); + exit(0); + case RCOM: + + *A(aptr) = ipc; + aptr_inc(); + *A(aptr) = 0; + + break; + + case SCOM: + i = substitute(ipc); + if(ipc->pfl && i) + if(ipc->pfl == 1) { + for(p1 = linebuf; p1 < &linebuf[spend]; + p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + else + goto cpcom; + if(i && ipc->fcode) + goto wcom; + break; + + case TCOM: + if(sflag == 0) break; + sflag = 0; + jflag = 1; + break; + + wcom: + case WCOM: + fprintf(ipc->fcode, "%s\n", linebuf); + break; + case XCOM: + p1 = linebuf; + p2 = genbuf; + while(*p2++ = *p1++, p1 <= &linebuf[spend]); + genend = p2-1 - genbuf; + p1 = holdsp; + p2 = linebuf; + while(*p2++ = *p1++, p1 <= &holdsp[hspend]); + spend = p2-1 - linebuf; + p1 = genbuf; + p2 = holdsp; + while(*p2++ = *p1++, p1 <= &genbuf[genend]); + hspend = p2-1 - holdsp; + break; + + case YCOM: + if (multibyte) { + struct yitem **yt, *yp; + + yt = (struct yitem **)ipc->bptr.re1; + k1 = 0; /*linebuf*/ + k2 = 0; /*genbuf*/ + do { + k3 = k1; + lp = &linebuf[k1]; + c = fetch(&lp); + k1 = lp - linebuf; + if (invchar == 0 && + (yp = ylook(c, yt, 0)) != NULL) { + k3 = 0; /*yp->y_mc*/ + do { + if (k2 >= gbend) + growsp("output " + "line too " + "long."); + genbuf[k2] = + yp->y_mc[k3++]; + } while (genbuf[k2++] != '\0'); + k2--; + } else { + while (k3 < k1) { + if (k2 >= gbend) + growsp("output " + "line too " + "long."); + genbuf[k2++] = + linebuf[k3++]; + } + } + } while (k1 <= spend); + genend = k2-1; + p1 = linebuf; + p2 = genbuf; + while (*p1++ = *p2++, p2 <= &genbuf[genend]); + spend = p1-1 - linebuf; + } else { + p1 = linebuf; + p2 = ipc->bptr.re1; + while((*p1 = p2[*p1 & 0377]) != 0) p1++; + } + break; + case COCOM: + case ECOM: + case FCOM: + case CWCOM: + ; + } + +} + +static int +gline(int addr) +{ + register char *p2; + register int c; + register int c1; + c1 = addr; + p2 = cbp; + for (;;) { + if (p2 >= ebp) { + if (f < 0 || (c = read(f, ibuf, sizeof ibuf)) <= 0) { + if (c1 > addr && dolflag == 0) { + c = 1; + ibuf[0] = '\n'; + close(f); + f = -1; + } else + return(-1); + } else + ibrd += c; + p2 = ibuf; + ebp = ibuf+c; + } + if ((c = *p2++ & 0377) == '\n') { + ibrd--; + if(needdol && p2 >= ebp) { + if(f<0||(c = read(f, ibuf, sizeof ibuf)) <= 0) { + close(f); + f = -1; + if(eargc == 0) + dolflag = 1; + } else + ibrd += c; + + p2 = ibuf; + ebp = ibuf + c; + } + break; + } + if(c1 >= lbend) + growsp(NULL); + linebuf[c1++] = (char)c; + ibrd--; + } + lnum++; + if(c1 >= lbend) + growsp(NULL); + linebuf[c1] = 0; + cbp = p2; + + sflag = 0; + return(c1); +} + +static void +arout(void) +{ + register char *p1; + struct reptr **a; + FILE *fi; + char c; + int t; + + for (a = abuf; *a; a++) { + if((*a)->command == ACOM) { + for(p1 = (*a)->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + } else { + if((fi = fopen((*a)->bptr.re1, "r")) == NULL) + continue; + while((t = getc(fi)) != EOF) { + c = t; + putc(c&0377, stdout); + } + fclose(fi); + } + } + aptr = 1; + *A(aptr) = 0; +} + +static void +lcom(wint_t c, int valid) +{ + if (!valid) { + oout(c); + return; + } +#if defined (SUS) || defined (SU3) + switch (c) { + case '\\': + mout("\\\\"); + return; + case '\a': + mout("\\a"); + return; + case '\b': + mout("\\b"); + return; + case '\f': + mout("\\f"); + return; + case '\r': + mout("\\r"); + return; + case '\t': + mout("\\t"); + return; + case '\v': + mout("\\v"); + return; + } +#else /* !SUS, !SU3 */ + if (c < 040) { + mout(trans[c]); + return; + } +#endif /* !SUS, !SU3 */ + if (multibyte) { + if (iswprint(c)) + wout(c); + else + nout(c); + } else { + if (isprint(c)) + lout(c); + else + oout(c); + } +} + +static void +oout(int c) +{ + char lbuf[5], *p; + int d; + const char *nums = "01234567"; + + p = lbuf; + *p++ = '\\'; + *p++ = nums[(c & ~077) >> 6]; + c &= 077; + d = c & 07; + *p++ = c > d ? nums[(c-d)>>3] : nums[0]; + *p++ = nums[d]; + *p = '\0'; + mout(lbuf); +} + +static void +mout(const char *p) +{ + while (*p != '\0') { + lout(*p & 0377); + p++; + } +} + +static void +nout(wint_t c) +{ + char mb[MB_LEN_MAX+1]; + char *p; + int i; + + if ((i = wctomb(mb, c)) > 0) { + mb[i] = '\0'; + for (p = mb; *p; p++) + oout(*p & 0377); + } +} + +static void +lout(int c) +{ + if (lcomlen++ > 70) { + putc('\\', stdout); + putc('\n', stdout); + lcomlen = 1; + } + putc(c, stdout); +} + +static void +wout(wint_t c) +{ + char mb[MB_LEN_MAX+1], *p; + int i, w; + + if ((i = wctomb(mb, c)) > 0) { + w = wcwidth(c); + if (lcomlen + w > 70) { + putc('\\', stdout); + putc('\n', stdout); + lcomlen = 0; + } + mb[i] = '\0'; + for (p = mb; *p; p++) + putc(*p & 0377, stdout); + lcomlen += w; + } +} diff --git a/sed/version.c b/sed/version.c @@ -0,0 +1,22 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)sed_su3.sl 2.34 (gritter) 6/26/05"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)sed_sus.sl 2.34 (gritter) 6/26/05"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)sed_s42.sl 2.34 (gritter) 6/26/05"; +#else /* !SUS, !SU3, !S42 */ +static const char sccsid[] USED = "@(#)sed.sl 2.34 (gritter) 6/26/05"; +#endif /* !SUS, !SU3, !S42 */ +/* SLIST */ +/* +sed.h: Sccsid @(#)sed.h 1.32 (gritter) 2/6/05 +sed0.c: Sccsid @(#)sed0.c 1.64 (gritter) 3/12/05> +sed1.c: Sccsid @(#)sed1.c 1.42 (gritter) 2/6/05> +*/