fatbase

portable OpenBSD tools
git clone git://git.2f30.org/fatbase
Log | Files | Refs

commit 2a4b1b0de4ebb3063026f4dff88419c802964736
Author: sin <sin@2f30.org>
Date:   Tue, 18 Nov 2014 15:44:25 +0000

Initial commit

Diffstat:
AMakefile | 16++++++++++++++++
Aawk/Makefile | 22++++++++++++++++++++++
Aawk/arc4random.c | 12++++++++++++
Aawk/awk.1 | 800+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/awk.h | 240+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/awkgram.y | 487+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/b.c | 958+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/lex.c | 597+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/lib.c | 739+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/main.c | 212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/maketab.c | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/parse.c | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/proto.h | 196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/reallocarray.c | 38++++++++++++++++++++++++++++++++++++++
Aawk/run.c | 2027+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/strlcat.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/strlcpy.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/tran.c | 458+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aawk/util.h | 8++++++++
Ased/Makefile | 10++++++++++
Ased/POSIX | 199+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/TEST/hanoi.sed | 103+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/TEST/math.sed | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/TEST/sed.test | 549+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/TODO | 1+
Ased/compile.c | 861+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/defs.h | 148+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/extern.h | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/fgetln.c | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/main.c | 358+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/misc.c | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/process.c | 625+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/reallocarray.c | 38++++++++++++++++++++++++++++++++++++++
Ased/sed.1 | 566+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/strlcat.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/strlcpy.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Ased/util.h | 8++++++++
37 files changed, 11339 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,16 @@ +TARG = awk sed + +all: + @echo "CFLAGS = $(CFLAGS)" + @echo "LDFLAGS = $(LDFLAGS)" + @echo "CC = $(CC)" + for i in $(TARG); do cd $$i && $(MAKE) || exit; cd ..; done + +install: + for i in $(TARG); do cd $$i && $(MAKE) install || exit; cd ..; done + +uninstall: + for i in $(TARG); do cd $$i && $(MAKE) uninstall || exit; cd ..; done + +clean: + for i in $(TARG); do cd $$i && $(MAKE) clean || exit; cd ..; done diff --git a/awk/Makefile b/awk/Makefile @@ -0,0 +1,22 @@ +OBJ = arc4random.o b.o lex.o lib.o main.o parse.o proctab.o reallocarray.o run.o strlcat.o strlcpy.o tran.o ytab.o +BIN = awk +LDLIBS = -lm + +all: ytab.c proctab.c $(BIN) + +ytab.c ytab.h: awkgram.y + $(YACC) -d awkgram.y + mv y.tab.c ytab.c + mv y.tab.h ytab.h + +proctab.c: maketab + ./maketab > proctab.c + +maketab: ytab.h maketab.c + $(CC) $(CFLAGS) maketab.c -o $@ + +$(BIN): $(OBJ) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ) $(LDLIBS) + +clean: + rm -f $(BIN) $(OBJ) maketab proctab.c ytab.[ch] diff --git a/awk/arc4random.c b/awk/arc4random.c @@ -0,0 +1,12 @@ +#include <sys/types.h> +#include <stdint.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> + +uint32_t +arc4random(void) +{ + srand(time(NULL) ^ getpid() ^ 0x42); + return rand(); +} diff --git a/awk/awk.1 b/awk/awk.1 @@ -0,0 +1,800 @@ +.\" $OpenBSD: awk.1,v 1.41 2014/03/17 21:48:51 jmc Exp $ +.\" +.\" Copyright (C) Lucent Technologies 1997 +.\" All Rights Reserved +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that the above copyright notice appear in all +.\" copies and that both that the copyright notice and this +.\" permission notice and warranty disclaimer appear in supporting +.\" documentation, and that the name Lucent Technologies or any of +.\" its entities not be used in advertising or publicity pertaining +.\" to distribution of the software without specific, written prior +.\" permission. +.\" +.\" LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +.\" IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +.\" SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +.\" IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +.\" THIS SOFTWARE. +.\" +.Dd $Mdocdate: March 17 2014 $ +.Dt AWK 1 +.Os +.Sh NAME +.Nm awk +.Nd pattern-directed scanning and processing language +.Sh SYNOPSIS +.Nm awk +.Op Fl safe +.Op Fl V +.Op Fl d Ns Op Ar n +.Op Fl F Ar fs +.Op Fl v Ar var Ns = Ns Ar value +.Op Ar prog | Fl f Ar progfile +.Ar +.Sh DESCRIPTION +.Nm +scans each input +.Ar file +for lines that match any of a set of patterns specified literally in +.Ar prog +or in one or more files specified as +.Fl f Ar progfile . +With each pattern there can be an associated action that will be performed +when a line of a +.Ar file +matches the pattern. +Each line is matched against the +pattern portion of every pattern-action statement; +the associated action is performed for each matched pattern. +The file name +.Sq - +means the standard input. +Any +.Ar file +of the form +.Ar var Ns = Ns Ar value +is treated as an assignment, not a filename, +and is executed at the time it would have been opened if it were a filename. +.Pp +The options are as follows: +.Bl -tag -width "-safe " +.It Fl d Ns Op Ar n +Debug mode. +Set debug level to +.Ar n , +or 1 if +.Ar n +is not specified. +A value greater than 1 causes +.Nm +to dump core on fatal errors. +.It Fl F Ar fs +Define the input field separator to be the regular expression +.Ar fs . +.It Fl f Ar progfile +Read program code from the specified file +.Ar progfile +instead of from the command line. +.It Fl safe +Disable file output +.Pf ( Ic print No > , +.Ic print No >> ) , +process creation +.Po +.Ar cmd | Ic getline , +.Ic print | , +.Ic system +.Pc +and access to the environment +.Pf ( Va ENVIRON ; +see the section on variables below). +This is a first +.Pq and not very reliable +approximation to a +.Dq safe +version of +.Nm . +.It Fl V +Print the version number of +.Nm +to standard output and exit. +.It Fl v Ar var Ns = Ns Ar value +Assign +.Ar value +to variable +.Ar var +before +.Ar prog +is executed; +any number of +.Fl v +options may be present. +.El +.Pp +The input is normally made up of input lines +.Pq records +separated by newlines, or by the value of +.Va RS . +If +.Va RS +is null, then any number of blank lines are used as the record separator, +and newlines are used as field separators +(in addition to the value of +.Va FS ) . +This is convenient when working with multi-line records. +.Pp +An input line is normally made up of fields separated by whitespace, +or by the regular expression +.Va FS . +The fields are denoted +.Va $1 , $2 , ... , +while +.Va $0 +refers to the entire line. +If +.Va FS +is null, the input line is split into one field per character. +.Pp +Normally, any number of blanks separate fields. +In order to set the field separator to a single blank, use the +.Fl F +option with a value of +.Sq [\ \&] . +If a field separator of +.Sq t +is specified, +.Nm +treats it as if +.Sq \et +had been specified and uses +.Aq TAB +as the field separator. +In order to use a literal +.Sq t +as the field separator, use the +.Fl F +option with a value of +.Sq [t] . +.Pp +A pattern-action statement has the form +.Pp +.D1 Ar pattern Ic \&{ Ar action Ic \&} +.Pp +A missing +.Ic \&{ Ar action Ic \&} +means print the line; +a missing pattern always matches. +Pattern-action statements are separated by newlines or semicolons. +.Pp +Newlines are permitted after a terminating statement or following a comma +.Pq Sq ,\& , +an open brace +.Pq Sq { , +a logical AND +.Pq Sq && , +a logical OR +.Pq Sq || , +after the +.Sq do +or +.Sq else +keywords, +or after the closing parenthesis of an +.Sq if , +.Sq for , +or +.Sq while +statement. +Additionally, a backslash +.Pq Sq \e +can be used to escape a newline between tokens. +.Pp +An action is a sequence of statements. +A statement can be one of the following: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Xo Ic if ( Ar expression ) Ar statement +.Op Ic else Ar statement +.Xc +.It Ic while ( Ar expression ) Ar statement +.It Xo Ic for +.No ( Ar expression ; expression ; expression ) statement +.Xc +.It Xo Ic for +.No ( Ar var Ic in Ar array ) statement +.Xc +.It Xo Ic do +.Ar statement Ic while ( Ar expression ) +.Xc +.It Ic break +.It Ic continue +.It Xo Ic { +.Op Ar statement ... +.Ic } +.Xc +.It Xo Ar expression +.No # commonly +.Ar var No = Ar expression +.Xc +.It Xo Ic print +.Op Ar expression-list +.Op > Ns Ar expression +.Xc +.It Xo Ic printf Ar format +.Op Ar ... , expression-list +.Op > Ns Ar expression +.Xc +.It Ic return Op Ar expression +.It Xo Ic next +.No # skip remaining patterns on this input line +.Xc +.It Xo Ic nextfile +.No # skip rest of this file, open next, start at top +.Xc +.It Xo Ic delete +.Sm off +.Ar array Ic \&[ Ar expression Ic \&] +.Sm on +.No # delete an array element +.Xc +.It Xo Ic delete Ar array +.No # delete all elements of array +.Xc +.It Xo Ic exit +.Op Ar expression +.No # exit immediately; status is Ar expression +.Xc +.El +.Pp +Statements are terminated by +semicolons, newlines or right braces. +An empty +.Ar expression-list +stands for +.Ar $0 . +String constants are quoted +.Li \&"" , +with the usual C escapes recognized within +(see +.Xr printf 1 +for a complete list of these). +Expressions take on string or numeric values as appropriate, +and are built using the operators +.Ic + \- * / % ^ +.Pq exponentiation , +and concatenation +.Pq indicated by whitespace . +The operators +.Ic \&! ++ \-\- += \-= *= /= %= ^= +.Ic > >= < <= == != ?: +are also available in expressions. +Variables may be scalars, array elements +(denoted +.Li x[i] ) +or fields. +Variables are initialized to the null string. +Array subscripts may be any string, +not necessarily numeric; +this allows for a form of associative memory. +Multiple subscripts such as +.Li [i,j,k] +are permitted; the constituents are concatenated, +separated by the value of +.Va SUBSEP +.Pq see the section on variables below . +.Pp +The +.Ic print +statement prints its arguments on the standard output +(or on a file if +.Pf > Ns Ar file +or +.Pf >> Ns Ar file +is present or on a pipe if +.Pf |\ \& Ar cmd +is present), separated by the current output field separator, +and terminated by the output record separator. +.Ar file +and +.Ar cmd +may be literal names or parenthesized expressions; +identical string values in different statements denote +the same open file. +The +.Ic printf +statement formats its expression list according to the format +(see +.Xr printf 1 ) . +.Pp +Patterns are arbitrary Boolean combinations +(with +.Ic "\&! || &&" ) +of regular expressions and +relational expressions. +.Nm +supports extended regular expressions +.Pq EREs . +See +.Xr re_format 7 +for more information on regular expressions. +Isolated regular expressions +in a pattern apply to the entire line. +Regular expressions may also occur in +relational expressions, using the operators +.Ic ~ +and +.Ic !~ . +.Pf / Ns Ar re Ns / +is a constant regular expression; +any string (constant or variable) may be used +as a regular expression, except in the position of an isolated regular expression +in a pattern. +.Pp +A pattern may consist of two patterns separated by a comma; +in this case, the action is performed for all lines +from an occurrence of the first pattern +through an occurrence of the second. +.Pp +A relational expression is one of the following: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Ar expression matchop regular-expression +.It Ar expression relop expression +.It Ar expression Ic in Ar array-name +.It Xo Ic \&( Ns +.Ar expr , expr , \&... Ns Ic \&) in +.Ar array-name +.Xc +.El +.Pp +where a +.Ar relop +is any of the six relational operators in C, and a +.Ar matchop +is either +.Ic ~ +(matches) +or +.Ic !~ +(does not match). +A conditional is an arithmetic expression, +a relational expression, +or a Boolean combination +of these. +.Pp +The special patterns +.Ic BEGIN +and +.Ic END +may be used to capture control before the first input line is read +and after the last. +.Ic BEGIN +and +.Ic END +do not combine with other patterns. +.Pp +Variable names with special meanings: +.Pp +.Bl -tag -width "FILENAME " -compact +.It Va ARGC +Argument count, assignable. +.It Va ARGV +Argument array, assignable; +non-null members are taken as filenames. +.It Va CONVFMT +Conversion format when converting numbers +(default +.Qq Li %.6g ) . +.It Va ENVIRON +Array of environment variables; subscripts are names. +.It Va FILENAME +The name of the current input file. +.It Va FNR +Ordinal number of the current record in the current file. +.It Va FS +Regular expression used to separate fields; also settable +by option +.Fl F Ar fs . +.It Va NF +Number of fields in the current record. +.Va $NF +can be used to obtain the value of the last field in the current record. +.It Va NR +Ordinal number of the current record. +.It Va OFMT +Output format for numbers (default +.Qq Li %.6g ) . +.It Va OFS +Output field separator (default blank). +.It Va ORS +Output record separator (default newline). +.It Va RLENGTH +The length of the string matched by the +.Fn match +function. +.It Va RS +Input record separator (default newline). +.It Va RSTART +The starting position of the string matched by the +.Fn match +function. +.It Va SUBSEP +Separates multiple subscripts (default 034). +.El +.Sh FUNCTIONS +The awk language has a variety of built-in functions: +arithmetic, string, input/output, general, and bit-operation. +.Pp +Functions may be defined (at the position of a pattern-action statement) +thusly: +.Pp +.Dl function foo(a, b, c) { ...; return x } +.Pp +Parameters are passed by value if scalar, and by reference if array name; +functions may be called recursively. +Parameters are local to the function; all other variables are global. +Thus local variables may be created by providing excess parameters in +the function definition. +.Ss Arithmetic Functions +.Bl -tag -width "atan2(y, x)" +.It Fn atan2 y x +Return the arctangent of +.Fa y Ns / Ns Fa x +in radians. +.It Fn cos x +Return the cosine of +.Fa x , +where +.Fa x +is in radians. +.It Fn exp x +Return the exponential of +.Fa x . +.It Fn int x +Return +.Fa x +truncated to an integer value. +.It Fn log x +Return the natural logarithm of +.Fa x . +.It Fn rand +Return a random number, +.Fa n , +such that +.Sm off +.Pf 0 \*(Le Fa n No \*(Lt 1 . +.Sm on +.It Fn sin x +Return the sine of +.Fa x , +where +.Fa x +is in radians. +.It Fn sqrt x +Return the square root of +.Fa x . +.It Fn srand expr +Sets seed for +.Fn rand +to +.Fa expr +and returns the previous seed. +If +.Fa expr +is omitted, the time of day is used instead. +.El +.Ss String Functions +.Bl -tag -width "split(s, a, fs)" +.It Fn gsub r t s +The same as +.Fn sub +except that all occurrences of the regular expression are replaced. +.Fn gsub +returns the number of replacements. +.It Fn index s t +The position in +.Fa s +where the string +.Fa t +occurs, or 0 if it does not. +.It Fn length s +The length of +.Fa s +taken as a string, +or of +.Va $0 +if no argument is given. +.It Fn match s r +The position in +.Fa s +where the regular expression +.Fa r +occurs, or 0 if it does not. +The variable +.Va RSTART +is set to the starting position of the matched string +.Pq which is the same as the returned value +or zero if no match is found. +The variable +.Va RLENGTH +is set to the length of the matched string, +or \-1 if no match is found. +.It Fn split s a fs +Splits the string +.Fa s +into array elements +.Va a[1] , a[2] , ... , a[n] +and returns +.Va n . +The separation is done with the regular expression +.Ar fs +or with the field separator +.Va FS +if +.Ar fs +is not given. +An empty string as field separator splits the string +into one array element per character. +.It Fn sprintf fmt expr ... +The string resulting from formatting +.Fa expr , ... +according to the +.Xr printf 1 +format +.Fa fmt . +.It Fn sub r t s +Substitutes +.Fa t +for the first occurrence of the regular expression +.Fa r +in the string +.Fa s . +If +.Fa s +is not given, +.Va $0 +is used. +An ampersand +.Pq Sq & +in +.Fa t +is replaced in string +.Fa s +with regular expression +.Fa r . +A literal ampersand can be specified by preceding it with two backslashes +.Pq Sq \e\e . +A literal backslash can be specified by preceding it with another backslash +.Pq Sq \e\e . +.Fn sub +returns the number of replacements. +.It Fn substr s m n +Return at most the +.Fa n Ns -character +substring of +.Fa s +that begins at position +.Fa m +counted from 1. +If +.Fa n +is omitted, or if +.Fa n +specifies more characters than are left in the string, +the length of the substring is limited by the length of +.Fa s . +.It Fn tolower str +Returns a copy of +.Fa str +with all upper-case characters translated to their +corresponding lower-case equivalents. +.It Fn toupper str +Returns a copy of +.Fa str +with all lower-case characters translated to their +corresponding upper-case equivalents. +.El +.Ss Input/Output and General Functions +.Bl -tag -width "getline [var] < file" +.It Fn close expr +Closes the file or pipe +.Fa expr . +.Fa expr +should match the string that was used to open the file or pipe. +.It Ar cmd | Ic getline Op Va var +Read a record of input from a stream piped from the output of +.Ar cmd . +If +.Va var +is omitted, the variables +.Va $0 +and +.Va NF +are set. +Otherwise +.Va var +is set. +If the stream is not open, it is opened. +As long as the stream remains open, subsequent calls +will read subsequent records from the stream. +The stream remains open until explicitly closed with a call to +.Fn close . +.Ic getline +returns 1 for a successful input, 0 for end of file, and \-1 for an error. +.It Fn fflush [expr] +Flushes any buffered output for the file or pipe +.Fa expr , +or all open files or pipes if +.Fa expr +is omitted. +.Fa expr +should match the string that was used to open the file or pipe. +.It Ic getline +Sets +.Va $0 +to the next input record from the current input file. +This form of +.Ic getline +sets the variables +.Va NF , +.Va NR , +and +.Va FNR . +.Ic getline +returns 1 for a successful input, 0 for end of file, and \-1 for an error. +.It Ic getline Va var +Sets +.Va $0 +to variable +.Va var . +This form of +.Ic getline +sets the variables +.Va NR +and +.Va FNR . +.Ic getline +returns 1 for a successful input, 0 for end of file, and \-1 for an error. +.It Xo +.Ic getline Op Va var +.Pf \ \&< Ar file +.Xc +Sets +.Va $0 +to the next record from +.Ar file . +If +.Va var +is omitted, the variables +.Va $0 +and +.Va NF +are set. +Otherwise +.Va var +is set. +If +.Ar file +is not open, it is opened. +As long as the stream remains open, subsequent calls will read subsequent +records from +.Ar file . +.Ar file +remains open until explicitly closed with a call to +.Fn close . +.It Fn system cmd +Executes +.Fa cmd +and returns its exit status. +.El +.Ss Bit-Operation Functions +.Bl -tag -width "lshift(a, b)" +.It Fn compl x +Returns the bitwise complement of integer argument x. +.It Fn and x y +Performs a bitwise AND on integer arguments x and y. +.It Fn or x y +Performs a bitwise OR on integer arguments x and y. +.It Fn xor x y +Performs a bitwise Exclusive-OR on integer arguments x and y. +.It Fn lshift x n +Returns integer argument x shifted by n bits to the left. +.It Fn rshift x n +Returns integer argument x shifted by n bits to the right. +.El +.Sh EXIT STATUS +.Ex -std awk +.Pp +But note that the +.Ic exit +expression can modify the exit status. +.Sh EXAMPLES +Print lines longer than 72 characters: +.Pp +.Dl length($0) > 72 +.Pp +Print first two fields in opposite order: +.Pp +.Dl { print $2, $1 } +.Pp +Same, with input fields separated by comma and/or blanks and tabs: +.Bd -literal -offset indent +BEGIN { FS = ",[ \et]*|[ \et]+" } + { print $2, $1 } +.Ed +.Pp +Add up first column, print sum and average: +.Bd -literal -offset indent +{ s += $1 } +END { print "sum is", s, " average is", s/NR } +.Ed +.Pp +Print all lines between start/stop pairs: +.Pp +.Dl /start/, /stop/ +.Pp +Simulate echo(1): +.Bd -literal -offset indent +BEGIN { # Simulate echo(1) + for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i] + printf "\en" + exit } +.Ed +.Pp +Print an error message to standard error: +.Bd -literal -offset indent +{ print "error!" > "/dev/stderr" } +.Ed +.Sh SEE ALSO +.Xr lex 1 , +.Xr printf 1 , +.Xr sed 1 , +.Xr re_format 7 , +.Xr script 7 +.Rs +.%A A. V. Aho +.%A B. W. Kernighan +.%A P. J. Weinberger +.%T The AWK Programming Language +.%I Addison-Wesley +.%D 1988 +.%O ISBN 0-201-07981-X +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification, +except +.Nm +does not support {n,m} pattern matching. +.Pp +The flags +.Op Fl \&dV +and +.Op Fl safe , +as well as the commands +.Cm fflush , compl , and , or , +.Cm xor , lshift , rshift , +are extensions to that specification. +.Sh HISTORY +An +.Nm +utility appeared in +.At v7 . +.Sh BUGS +There are no explicit conversions between numbers and strings. +To force an expression to be treated as a number add 0 to it; +to force it to be treated as a string concatenate +.Li \&"" +to it. +.Pp +The scope rules for variables in functions are a botch; +the syntax is worse. diff --git a/awk/awk.h b/awk/awk.h @@ -0,0 +1,240 @@ +/* $OpenBSD: awk.h,v 1.13 2008/10/06 20:38:33 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#include <assert.h> + +typedef double Awkfloat; + +/* unsigned char is more trouble than it's worth */ + +typedef unsigned char uschar; + +#define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } } + +#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf +*/ +#define DEBUG +#ifdef DEBUG + /* uses have to be doubly parenthesized */ +# define dprintf(x) if (dbg) printf x +#else +# define dprintf(x) +#endif + +extern int compile_time; /* 1 if compiling, 0 if running */ +extern int safe; /* 0 => unsafe, 1 => safe */ + +#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ +extern int recsize; /* size of current record, orig RECSIZE */ + +extern char **FS; +extern char **RS; +extern char **ORS; +extern char **OFS; +extern char **OFMT; +extern Awkfloat *NR; +extern Awkfloat *FNR; +extern Awkfloat *NF; +extern char **FILENAME; +extern char **SUBSEP; +extern Awkfloat *RSTART; +extern Awkfloat *RLENGTH; + +extern char *record; /* points to $0 */ +extern int lineno; /* line number in awk program */ +extern int errorflag; /* 1 if error has occurred */ +extern int donefld; /* 1 if record broken into fields */ +extern int donerec; /* 1 if record is valid (no fld has changed */ +extern char inputFS[]; /* FS at time of input, for field splitting */ + +extern int dbg; + +extern char *patbeg; /* beginning of pattern matched */ +extern int patlen; /* length of pattern matched. set in b.c */ + +/* Cell: all information about a variable or constant */ + +typedef struct Cell { + uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */ + uschar csub; /* CCON, CTEMP, CFLD, etc. */ + char *nval; /* name, for variables only */ + char *sval; /* string value */ + Awkfloat fval; /* value as number */ + int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + struct Cell *cnext; /* ptr to next if chained */ +} Cell; + +typedef struct Array { /* symbol table array */ + int nelem; /* elements in table right now */ + int size; /* size of tab */ + Cell **tab; /* hash table pointers */ +} Array; + +#define NSYMTAB 50 /* initial size of a symbol table */ +extern Array *symtab; + +extern Cell *nrloc; /* NR */ +extern Cell *fnrloc; /* FNR */ +extern Cell *nfloc; /* NF */ +extern Cell *rstartloc; /* RSTART */ +extern Cell *rlengthloc; /* RLENGTH */ + +/* Cell.tval values: */ +#define NUM 01 /* number value is valid */ +#define STR 02 /* string value is valid */ +#define DONTFREE 04 /* string space is not freeable */ +#define CON 010 /* this is a constant */ +#define ARR 020 /* this is an array */ +#define FCN 040 /* this is a function name */ +#define FLD 0100 /* this is a field $1, $2, ... */ +#define REC 0200 /* this is $0 */ + + +/* function types */ +#define FLENGTH 1 +#define FSQRT 2 +#define FEXP 3 +#define FLOG 4 +#define FINT 5 +#define FSYSTEM 6 +#define FRAND 7 +#define FSRAND 8 +#define FSIN 9 +#define FCOS 10 +#define FATAN 11 +#define FTOUPPER 12 +#define FTOLOWER 13 +#define FFLUSH 14 +#define FAND 15 +#define FFOR 16 +#define FXOR 17 +#define FCOMPL 18 +#define FLSHIFT 19 +#define FRSHIFT 20 + +/* Node: parse tree is made of nodes, with Cell's at bottom */ + +typedef struct Node { + int ntype; + struct Node *nnext; + int lineno; + int nobj; + struct Node *narg[1]; /* variable: actual size set by calling malloc */ +} Node; + +#define NIL ((Node *) 0) + +extern Node *winner; +extern Node *nullstat; +extern Node *nullnode; + +/* ctypes */ +#define OCELL 1 +#define OBOOL 2 +#define OJUMP 3 + +/* Cell subtypes: csub */ +#define CFREE 7 +#define CCOPY 6 +#define CCON 5 +#define CTEMP 4 +#define CNAME 3 +#define CVAR 2 +#define CFLD 1 +#define CUNK 0 + +/* bool subtypes */ +#define BTRUE 11 +#define BFALSE 12 + +/* jump subtypes */ +#define JEXIT 21 +#define JNEXT 22 +#define JBREAK 23 +#define JCONT 24 +#define JRET 25 +#define JNEXTFILE 26 + +/* node types */ +#define NVALUE 1 +#define NSTAT 2 +#define NEXPR 3 + + +extern int pairstack[], paircnt; + +#define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc) +#define isvalue(n) ((n)->ntype == NVALUE) +#define isexpr(n) ((n)->ntype == NEXPR) +#define isjump(n) ((n)->ctype == OJUMP) +#define isexit(n) ((n)->csub == JEXIT) +#define isbreak(n) ((n)->csub == JBREAK) +#define iscont(n) ((n)->csub == JCONT) +#define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE) +#define isret(n) ((n)->csub == JRET) +#define isrec(n) ((n)->tval & REC) +#define isfld(n) ((n)->tval & FLD) +#define isstr(n) ((n)->tval & STR) +#define isnum(n) ((n)->tval & NUM) +#define isarr(n) ((n)->tval & ARR) +#define isfcn(n) ((n)->tval & FCN) +#define istrue(n) ((n)->csub == BTRUE) +#define istemp(n) ((n)->csub == CTEMP) +#define isargument(n) ((n)->nobj == ARG) +/* #define freeable(p) (!((p)->tval & DONTFREE)) */ +#define freeable(p) ( ((p)->tval & (STR|DONTFREE)) == STR ) + +/* structures used by regular expression matching machinery, mostly b.c: */ + +#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ + /* watch out in match(), etc. */ +#define NSTATES 32 + +typedef struct rrow { + long ltype; /* long avoids pointer warnings on 64-bit */ + union { + int i; + Node *np; + uschar *up; + } lval; /* because Al stores a pointer in it! */ + int *lfollow; +} rrow; + +typedef struct fa { + uschar gototab[NSTATES][NCHARS]; + uschar out[NSTATES]; + uschar *restr; + int *posns[NSTATES]; + int anchor; + int use; + int initstat; + int curstat; + int accept; + int reset; + struct rrow re[1]; /* variable: actual size set by calling malloc */ +} fa; + + +#include "proto.h" diff --git a/awk/awkgram.y b/awk/awkgram.y @@ -0,0 +1,487 @@ +/* $OpenBSD: awkgram.y,v 1.9 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +%{ +#include <stdio.h> +#include <string.h> +#include "awk.h" + +void checkdup(Node *list, Cell *item); +int yywrap(void) { return(1); } + +Node *beginloc = 0; +Node *endloc = 0; +int infunc = 0; /* = 1 if in arglist or body of func */ +int inloop = 0; /* = 1 if in while, for, do */ +char *curfname = 0; /* current function name */ +Node *arglist = 0; /* list of args for current function */ +%} + +%union { + Node *p; + Cell *cp; + int i; + char *s; +} + +%token <i> FIRSTTOKEN /* must be first */ +%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND +%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' +%token <i> ARRAY +%token <i> MATCH NOTMATCH MATCHOP +%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE +%token <i> AND BOR APPEND EQ GE GT LE LT NE IN +%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE +%token <i> ADD MINUS MULT DIVIDE MOD +%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ +%token <i> PRINT PRINTF SPRINTF +%token <p> ELSE INTEST CONDEXPR +%token <i> POSTINCR PREINCR POSTDECR PREDECR +%token <cp> VAR IVAR VARNF CALL NUMBER STRING +%token <s> REGEXPR + +%type <p> pas pattern ppattern plist pplist patlist prarg term re +%type <p> pa_pat pa_stat pa_stats +%type <s> reg_expr +%type <p> simple_stmt opt_simple_stmt stmt stmtlist +%type <p> var varname funcname varlist +%type <p> for if else while +%type <i> do st +%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor +%type <i> subop print + +%right ASGNOP +%right '?' +%right ':' +%left BOR +%left AND +%left GETLINE +%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' +%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER +%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR +%left REGEXPR VAR VARNF IVAR WHILE '(' +%left CAT +%left '+' '-' +%left '*' '/' '%' +%left NOT UMINUS +%right POWER +%right DECR INCR +%left INDIRECT +%token LASTTOKEN /* must be last */ + +%% + +program: + pas { if (errorflag==0) + winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } + | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } + ; + +and: + AND | and NL + ; + +bor: + BOR | bor NL + ; + +comma: + ',' | comma NL + ; + +do: + DO | do NL + ; + +else: + ELSE | else NL + ; + +for: + FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt + { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } + | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt + { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } + | FOR '(' varname IN varname rparen {inloop++;} stmt + { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } + ; + +funcname: + VAR { setfname($1); } + | CALL { setfname($1); } + ; + +if: + IF '(' pattern rparen { $$ = notnull($3); } + ; + +lbrace: + '{' | lbrace NL + ; + +nl: + NL | nl NL + ; + +opt_nl: + /* empty */ { $$ = 0; } + | nl + ; + +opt_pst: + /* empty */ { $$ = 0; } + | pst + ; + + +opt_simple_stmt: + /* empty */ { $$ = 0; } + | simple_stmt + ; + +pas: + opt_pst { $$ = 0; } + | opt_pst pa_stats opt_pst { $$ = $2; } + ; + +pa_pat: + pattern { $$ = notnull($1); } + ; + +pa_stat: + pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } + | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } + | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } + | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } + | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } + | XBEGIN lbrace stmtlist '}' + { beginloc = linkum(beginloc, $3); $$ = 0; } + | XEND lbrace stmtlist '}' + { endloc = linkum(endloc, $3); $$ = 0; } + | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' + { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } + ; + +pa_stats: + pa_stat + | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } + ; + +patlist: + pattern + | patlist comma pattern { $$ = linkum($1, $3); } + ; + +ppattern: + var ASGNOP ppattern { $$ = op2($2, $1, $3); } + | ppattern '?' ppattern ':' ppattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | ppattern bor ppattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | ppattern and ppattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | ppattern MATCHOP ppattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | re + | term + ; + +pattern: + var ASGNOP pattern { $$ = op2($2, $1, $3); } + | pattern '?' pattern ':' pattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | pattern bor pattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | pattern and pattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | pattern EQ pattern { $$ = op2($2, $1, $3); } + | pattern GE pattern { $$ = op2($2, $1, $3); } + | pattern GT pattern { $$ = op2($2, $1, $3); } + | pattern LE pattern { $$ = op2($2, $1, $3); } + | pattern LT pattern { $$ = op2($2, $1, $3); } + | pattern NE pattern { $$ = op2($2, $1, $3); } + | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | pattern MATCHOP pattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | pattern '|' GETLINE var { + if (safe) SYNTAX("cmd | getline is unsafe"); + else $$ = op3(GETLINE, $4, itonp($2), $1); } + | pattern '|' GETLINE { + if (safe) SYNTAX("cmd | getline is unsafe"); + else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } + | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | re + | term + ; + +plist: + pattern comma pattern { $$ = linkum($1, $3); } + | plist comma pattern { $$ = linkum($1, $3); } + ; + +pplist: + ppattern + | pplist comma ppattern { $$ = linkum($1, $3); } + ; + +prarg: + /* empty */ { $$ = rectonode(); } + | pplist + | '(' plist ')' { $$ = $2; } + ; + +print: + PRINT | PRINTF + ; + +pst: + NL | ';' | pst NL | pst ';' + ; + +rbrace: + '}' | rbrace NL + ; + +re: + reg_expr + { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } + | NOT re { $$ = op1(NOT, notnull($2)); } + ; + +reg_expr: + '/' {startreg();} REGEXPR '/' { $$ = $3; } + ; + +rparen: + ')' | rparen NL + ; + +simple_stmt: + print prarg '|' term { + if (safe) SYNTAX("print | is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } + | print prarg APPEND term { + if (safe) SYNTAX("print >> is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } + | print prarg GT term { + if (safe) SYNTAX("print > is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } + | print prarg { $$ = stat3($1, $2, NIL, NIL); } + | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } + | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } + | pattern { $$ = exptostat($1); } + | error { yyclearin; SYNTAX("illegal statement"); } + ; + +st: + nl + | ';' opt_nl + ; + +stmt: + BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); + $$ = stat1(BREAK, NIL); } + | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); + $$ = stat1(CONTINUE, NIL); } + | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st + { $$ = stat2(DO, $3, notnull($7)); } + | EXIT pattern st { $$ = stat1(EXIT, $2); } + | EXIT st { $$ = stat1(EXIT, NIL); } + | for + | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } + | if stmt { $$ = stat3(IF, $1, $2, NIL); } + | lbrace stmtlist rbrace { $$ = $2; } + | NEXT st { if (infunc) + SYNTAX("next is illegal inside a function"); + $$ = stat1(NEXT, NIL); } + | NEXTFILE st { if (infunc) + SYNTAX("nextfile is illegal inside a function"); + $$ = stat1(NEXTFILE, NIL); } + | RETURN pattern st { $$ = stat1(RETURN, $2); } + | RETURN st { $$ = stat1(RETURN, NIL); } + | simple_stmt st + | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } + | ';' opt_nl { $$ = 0; } + ; + +stmtlist: + stmt + | stmtlist stmt { $$ = linkum($1, $2); } + ; + +subop: + SUB | GSUB + ; + +term: + term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } + | term '+' term { $$ = op2(ADD, $1, $3); } + | term '-' term { $$ = op2(MINUS, $1, $3); } + | term '*' term { $$ = op2(MULT, $1, $3); } + | term '/' term { $$ = op2(DIVIDE, $1, $3); } + | term '%' term { $$ = op2(MOD, $1, $3); } + | term POWER term { $$ = op2(POWER, $1, $3); } + | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } + | '+' term %prec UMINUS { $$ = $2; } + | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } + | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } + | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } + | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } + | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } + | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } + | CLOSE term { $$ = op1(CLOSE, $2); } + | DECR var { $$ = op1(PREDECR, $2); } + | INCR var { $$ = op1(PREINCR, $2); } + | var DECR { $$ = op1(POSTDECR, $1); } + | var INCR { $$ = op1(POSTINCR, $1); } + | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } + | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } + | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } + | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } + | INDEX '(' pattern comma pattern ')' + { $$ = op2(INDEX, $3, $5); } + | INDEX '(' pattern comma reg_expr ')' + { SYNTAX("index() doesn't permit regular expressions"); + $$ = op2(INDEX, $3, (Node*)$5); } + | '(' pattern ')' { $$ = $2; } + | MATCHFCN '(' pattern comma reg_expr ')' + { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } + | MATCHFCN '(' pattern comma pattern ')' + { if (constnode($5)) + $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); + else + $$ = op3(MATCHFCN, (Node *)1, $3, $5); } + | NUMBER { $$ = celltonode($1, CCON); } + | SPLIT '(' pattern comma varname comma pattern ')' /* string */ + { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } + | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ + { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } + | SPLIT '(' pattern comma varname ')' + { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ + | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } + | STRING { $$ = celltonode($1, CCON); } + | subop '(' reg_expr comma pattern ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } + | subop '(' pattern comma pattern ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); + else + $$ = op4($1, (Node *)1, $3, $5, rectonode()); } + | subop '(' reg_expr comma pattern comma var ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } + | subop '(' pattern comma pattern comma var ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); + else + $$ = op4($1, (Node *)1, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, NIL); } + | var + ; + +var: + varname + | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } + | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } + | INDIRECT term { $$ = op1(INDIRECT, $2); } + ; + +varlist: + /* nothing */ { arglist = $$ = 0; } + | VAR { arglist = $$ = celltonode($1,CVAR); } + | varlist comma VAR { + checkdup($1, $3); + arglist = $$ = linkum($1,celltonode($3,CVAR)); } + ; + +varname: + VAR { $$ = celltonode($1, CVAR); } + | ARG { $$ = op1(ARG, itonp($1)); } + | VARNF { $$ = op1(VARNF, (Node *) $1); } + ; + + +while: + WHILE '(' pattern rparen { $$ = notnull($3); } + ; + +%% + +void setfname(Cell *p) +{ + if (isarr(p)) + SYNTAX("%s is an array, not a function", p->nval); + else if (isfcn(p)) + SYNTAX("you can't define function %s more than once", p->nval); + curfname = p->nval; +} + +int constnode(Node *p) +{ + return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; +} + +char *strnode(Node *p) +{ + return ((Cell *)(p->narg[0]))->sval; +} + +Node *notnull(Node *n) +{ + switch (n->nobj) { + case LE: case LT: case EQ: case NE: case GT: case GE: + case BOR: case AND: case NOT: + return n; + default: + return op2(NE, n, nullnode); + } +} + +void checkdup(Node *vl, Cell *cp) /* check if name already in list */ +{ + char *s = cp->nval; + for ( ; vl; vl = vl->nnext) { + if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { + SYNTAX("duplicate argument %s", s); + break; + } + } +} diff --git a/awk/b.c b/awk/b.c @@ -0,0 +1,958 @@ +/* $OpenBSD: b.c,v 1.17 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +/* lasciate ogne speranza, voi ch'intrate. */ + +#define DEBUG + +#include <ctype.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "awk.h" +#include "ytab.h" + +#define HAT (NCHARS+2) /* matches ^ in regular expr */ + /* NCHARS is 2**n */ +#define MAXLIN 22 + +#define type(v) (v)->nobj /* badly overloaded here */ +#define info(v) (v)->ntype /* badly overloaded here */ +#define left(v) (v)->narg[0] +#define right(v) (v)->narg[1] +#define parent(v) (v)->nnext + +#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: +#define ELEAF case EMPTYRE: /* empty string in regexp */ +#define UNARY case STAR: case PLUS: case QUEST: + +/* encoding in tree Nodes: + leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): + left is index, right contains value or pointer to value + unary (STAR, PLUS, QUEST): left is child, right is null + binary (CAT, OR): left and right are children + parent contains pointer to parent +*/ + + +int *setvec; +int *tmpset; +int maxsetvec = 0; + +int rtok; /* next token in current re */ +int rlxval; +static uschar *rlxstr; +static uschar *prestr; /* current position in current re */ +static uschar *lastre; /* origin of last re */ + +static int setcnt; +static int poscnt; + +char *patbeg; +int patlen; + +#define NFA 20 /* cache this many dynamic fa's */ +fa *fatab[NFA]; +int nfatab = 0; /* entries in fatab */ + +fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ +{ + int i, use, nuse; + fa *pfa; + static int now = 1; + + if (setvec == 0) { /* first time through any RE */ + maxsetvec = MAXLIN; + setvec = (int *) calloc(maxsetvec, sizeof(int)); + tmpset = (int *) calloc(maxsetvec, sizeof(int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space initializing makedfa"); + } + + if (compile_time) /* a constant for sure */ + return mkdfa(s, anchor); + for (i = 0; i < nfatab; i++) /* is it there already? */ + if (fatab[i]->anchor == anchor + && strcmp((const char *) fatab[i]->restr, s) == 0) { + fatab[i]->use = now++; + return fatab[i]; + } + pfa = mkdfa(s, anchor); + if (nfatab < NFA) { /* room for another */ + fatab[nfatab] = pfa; + fatab[nfatab]->use = now++; + nfatab++; + return pfa; + } + use = fatab[0]->use; /* replace least-recently used */ + nuse = 0; + for (i = 1; i < nfatab; i++) + if (fatab[i]->use < use) { + use = fatab[i]->use; + nuse = i; + } + freefa(fatab[nuse]); + fatab[nuse] = pfa; + pfa->use = now++; + return pfa; +} + +fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ + /* anchor = 1 for anchored matches, else 0 */ +{ + Node *p, *p1; + fa *f; + + p = reparse(s); + p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); + /* put ALL STAR in front of reg. exp. */ + p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); + /* put FINAL after reg. exp. */ + + poscnt = 0; + penter(p1); /* enter parent pointers and leaf indices */ + if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) + overflo("out of space for fa"); + f->accept = poscnt-1; /* penter has computed number of positions in re */ + cfoll(f, p1); /* set up follow sets */ + freetr(p1); + if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) + overflo("out of space in makedfa"); + if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) + overflo("out of space in makedfa"); + *f->posns[1] = 0; + f->initstat = makeinit(f, anchor); + f->anchor = anchor; + f->restr = (uschar *) tostring(s); + return f; +} + +int makeinit(fa *f, int anchor) +{ + int i, k; + + f->curstat = 2; + f->out[2] = 0; + f->reset = 0; + k = *(f->re[0].lfollow); + xfree(f->posns[2]); + if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + overflo("out of space in makeinit"); + for (i=0; i <= k; i++) { + (f->posns[2])[i] = (f->re[0].lfollow)[i]; + } + if ((f->posns[2])[1] == f->accept) + f->out[2] = 1; + for (i=0; i < NCHARS; i++) + f->gototab[2][i] = 0; + f->curstat = cgoto(f, 2, HAT); + if (anchor) { + *f->posns[2] = k-1; /* leave out position 0 */ + for (i=0; i < k; i++) { + (f->posns[0])[i] = (f->posns[2])[i]; + } + + f->out[0] = f->out[2]; + if (f->curstat != 2) + --(*f->posns[f->curstat]); + } + return f->curstat; +} + +void penter(Node *p) /* set up parent pointers and leaf indices */ +{ + switch (type(p)) { + ELEAF + LEAF + info(p) = poscnt; + poscnt++; + break; + UNARY + penter(left(p)); + parent(left(p)) = p; + break; + case CAT: + case OR: + penter(left(p)); + penter(right(p)); + parent(left(p)) = p; + parent(right(p)) = p; + break; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in penter", type(p)); + break; + } +} + +void freetr(Node *p) /* free parse tree */ +{ + switch (type(p)) { + ELEAF + LEAF + xfree(p); + break; + UNARY + freetr(left(p)); + xfree(p); + break; + case CAT: + case OR: + freetr(left(p)); + freetr(right(p)); + xfree(p); + break; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in freetr", type(p)); + break; + } +} + +/* in the parsing of regular expressions, metacharacters like . have */ +/* to be seen literally; \056 is not a metacharacter. */ + +int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ +{ /* only pick up one 8-bit byte (2 chars) */ + uschar *p; + int n = 0; + int i; + + for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { + if (isdigit(*p)) + n = 16 * n + *p - '0'; + else if (*p >= 'a' && *p <= 'f') + n = 16 * n + *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'F') + n = 16 * n + *p - 'A' + 10; + } + *pp = (uschar *) p; + return n; +} + +#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ + +int quoted(uschar **pp) /* pick up next thing after a \\ */ + /* and increment *pp */ +{ + uschar *p = *pp; + int c; + + if ((c = *p++) == 't') + c = '\t'; + else if (c == 'n') + c = '\n'; + else if (c == 'f') + c = '\f'; + else if (c == 'r') + c = '\r'; + else if (c == 'b') + c = '\b'; + else if (c == '\\') + c = '\\'; + else if (c == 'x') { /* hexadecimal goo follows */ + c = hexstr(&p); /* this adds a null if number is invalid */ + } else if (isoctdigit(c)) { /* \d \dd \ddd */ + int n = c - '0'; + if (isoctdigit(*p)) { + n = 8 * n + *p++ - '0'; + if (isoctdigit(*p)) + n = 8 * n + *p++ - '0'; + } + c = n; + } /* else */ + /* c = c; */ + *pp = p; + return c; +} + +char *cclenter(const char *argp) /* add a character class */ +{ + int i, c, c2; + uschar *p = (uschar *) argp; + uschar *op, *bp; + static uschar *buf = 0; + static int bufsz = 100; + + op = p; + if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + FATAL("out of space for character class [%.10s...] 1", p); + bp = buf; + for (i = 0; (c = *p++) != 0; ) { + if (c == '\\') { + c = quoted(&p); + } else if (c == '-' && i > 0 && bp[-1] != 0) { + if (*p != 0) { + c = bp[-1]; + c2 = *p++; + if (c2 == '\\') + c2 = quoted(&p); + if (c > c2) { /* empty; ignore */ + bp--; + i--; + continue; + } + while (c < c2) { + if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) + FATAL("out of space for character class [%.10s...] 2", p); + *bp++ = ++c; + i++; + } + continue; + } + } + if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) + FATAL("out of space for character class [%.10s...] 3", p); + *bp++ = c; + i++; + } + *bp = 0; + dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); + xfree(op); + return (char *) tostring((char *) buf); +} + +void overflo(const char *s) +{ + FATAL("regular expression too big: %.30s...", s); +} + +void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ +{ + int i; + int *p; + + switch (type(v)) { + ELEAF + LEAF + f->re[info(v)].ltype = type(v); + f->re[info(v)].lval.np = right(v); + while (f->accept >= maxsetvec) { /* guessing here! */ + maxsetvec *= 4; + setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); + tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space in cfoll()"); + } + for (i = 0; i <= f->accept; i++) + setvec[i] = 0; + setcnt = 0; + follow(v); /* computes setvec and setcnt */ + if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) + overflo("out of space building follow set"); + f->re[info(v)].lfollow = p; + *p = setcnt; + for (i = f->accept; i >= 0; i--) + if (setvec[i] == 1) + *++p = i; + break; + UNARY + cfoll(f,left(v)); + break; + case CAT: + case OR: + cfoll(f,left(v)); + cfoll(f,right(v)); + break; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in cfoll", type(v)); + } +} + +int first(Node *p) /* collects initially active leaves of p into setvec */ + /* returns 0 if p matches empty string */ +{ + int b, lp; + + switch (type(p)) { + ELEAF + LEAF + lp = info(p); /* look for high-water mark of subscripts */ + while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ + maxsetvec *= 4; + setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); + tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space in first()"); + } + if (type(p) == EMPTYRE) { + setvec[lp] = 0; + return(0); + } + if (setvec[lp] != 1) { + setvec[lp] = 1; + setcnt++; + } + if (type(p) == CCL && (*(char *) right(p)) == '\0') + return(0); /* empty CCL */ + else return(1); + case PLUS: + if (first(left(p)) == 0) return(0); + return(1); + case STAR: + case QUEST: + first(left(p)); + return(0); + case CAT: + if (first(left(p)) == 0 && first(right(p)) == 0) return(0); + return(1); + case OR: + b = first(right(p)); + if (first(left(p)) == 0 || b == 0) return(0); + return(1); + } + FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ + return(-1); +} + +void follow(Node *v) /* collects leaves that can follow v into setvec */ +{ + Node *p; + + if (type(v) == FINAL) + return; + p = parent(v); + switch (type(p)) { + case STAR: + case PLUS: + first(v); + follow(p); + return; + + case OR: + case QUEST: + follow(p); + return; + + case CAT: + if (v == left(p)) { /* v is left child of p */ + if (first(right(p)) == 0) { + follow(p); + return; + } + } else /* v is right child */ + follow(p); + return; + } +} + +int member(int c, const char *sarg) /* is c in s? */ +{ + uschar *s = (uschar *) sarg; + + while (*s) + if (c == *s++) + return(1); + return(0); +} + +int match(fa *f, const char *p0) /* shortest match ? */ +{ + int s, ns; + uschar *p = (uschar *) p0; + + s = f->reset ? makeinit(f,0) : f->initstat; + if (f->out[s]) + return(1); + do { + /* assert(*p < NCHARS); */ + if ((ns = f->gototab[s][*p]) != 0) + s = ns; + else + s = cgoto(f, s, *p); + if (f->out[s]) + return(1); + } while (*p++ != 0); + return(0); +} + +int pmatch(fa *f, const char *p0) /* longest match, for sub */ +{ + int s, ns; + uschar *p = (uschar *) p0; + uschar *q; + int i, k; + + /* s = f->reset ? makeinit(f,1) : f->initstat; */ + if (f->reset) { + f->initstat = s = makeinit(f,1); + } else { + s = f->initstat; + } + patbeg = (char *) p; + patlen = -1; + do { + q = p; + do { + if (f->out[s]) /* final state */ + patlen = q-p; + /* assert(*q < NCHARS); */ + if ((ns = f->gototab[s][*q]) != 0) + s = ns; + else + s = cgoto(f, s, *q); + if (s == 1) { /* no transition */ + if (patlen >= 0) { + patbeg = (char *) p; + return(1); + } + else + goto nextin; /* no match */ + } + } while (*q++ != 0); + if (f->out[s]) + patlen = q-p-1; /* don't count $ */ + if (patlen >= 0) { + patbeg = (char *) p; + return(1); + } + nextin: + s = 2; + if (f->reset) { + for (i = 2; i <= f->curstat; i++) + xfree(f->posns[i]); + k = *f->posns[0]; + if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + overflo("out of space in pmatch"); + for (i = 0; i <= k; i++) + (f->posns[2])[i] = (f->posns[0])[i]; + f->initstat = f->curstat = 2; + f->out[2] = f->out[0]; + for (i = 0; i < NCHARS; i++) + f->gototab[2][i] = 0; + } + } while (*p++ != 0); + return (0); +} + +int nematch(fa *f, const char *p0) /* non-empty match, for sub */ +{ + int s, ns; + uschar *p = (uschar *) p0; + uschar *q; + int i, k; + + /* s = f->reset ? makeinit(f,1) : f->initstat; */ + if (f->reset) { + f->initstat = s = makeinit(f,1); + } else { + s = f->initstat; + } + patlen = -1; + while (*p) { + q = p; + do { + if (f->out[s]) /* final state */ + patlen = q-p; + /* assert(*q < NCHARS); */ + if ((ns = f->gototab[s][*q]) != 0) + s = ns; + else + s = cgoto(f, s, *q); + if (s == 1) { /* no transition */ + if (patlen > 0) { + patbeg = (char *) p; + return(1); + } else + goto nnextin; /* no nonempty match */ + } + } while (*q++ != 0); + if (f->out[s]) + patlen = q-p-1; /* don't count $ */ + if (patlen > 0 ) { + patbeg = (char *) p; + return(1); + } + nnextin: + s = 2; + if (f->reset) { + for (i = 2; i <= f->curstat; i++) + xfree(f->posns[i]); + k = *f->posns[0]; + if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + overflo("out of state space"); + for (i = 0; i <= k; i++) + (f->posns[2])[i] = (f->posns[0])[i]; + f->initstat = f->curstat = 2; + f->out[2] = f->out[0]; + for (i = 0; i < NCHARS; i++) + f->gototab[2][i] = 0; + } + p++; + } + return (0); +} + +Node *reparse(const char *p) /* parses regular expression pointed to by p */ +{ /* uses relex() to scan regular expression */ + Node *np; + + dprintf( ("reparse <%s>\n", p) ); + lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ + rtok = relex(); + /* GNU compatibility: an empty regexp matches anything */ + if (rtok == '\0') { + /* FATAL("empty regular expression"); previous */ + return(op2(EMPTYRE, NIL, NIL)); + } + np = regexp(); + if (rtok != '\0') + FATAL("syntax error in regular expression %s at %s", lastre, prestr); + return(np); +} + +Node *regexp(void) /* top-level parse of reg expr */ +{ + return (alt(concat(primary()))); +} + +Node *primary(void) +{ + Node *np; + + switch (rtok) { + case CHAR: + np = op2(CHAR, NIL, itonp(rlxval)); + rtok = relex(); + return (unary(np)); + case ALL: + rtok = relex(); + return (unary(op2(ALL, NIL, NIL))); + case EMPTYRE: + rtok = relex(); + return (unary(op2(ALL, NIL, NIL))); + case DOT: + rtok = relex(); + return (unary(op2(DOT, NIL, NIL))); + case CCL: + np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); + rtok = relex(); + return (unary(np)); + case NCCL: + np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); + rtok = relex(); + return (unary(np)); + case '^': + rtok = relex(); + return (unary(op2(CHAR, NIL, itonp(HAT)))); + case '$': + rtok = relex(); + return (unary(op2(CHAR, NIL, NIL))); + case '(': + rtok = relex(); + if (rtok == ')') { /* special pleading for () */ + rtok = relex(); + return unary(op2(CCL, NIL, (Node *) tostring(""))); + } + np = regexp(); + if (rtok == ')') { + rtok = relex(); + return (unary(np)); + } + else + FATAL("syntax error in regular expression %s at %s", lastre, prestr); + default: + FATAL("illegal primary in regular expression %s at %s", lastre, prestr); + } + return 0; /*NOTREACHED*/ +} + +Node *concat(Node *np) +{ + switch (rtok) { + case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': + return (concat(op2(CAT, np, primary()))); + } + return (np); +} + +Node *alt(Node *np) +{ + if (rtok == OR) { + rtok = relex(); + return (alt(op2(OR, np, concat(primary())))); + } + return (np); +} + +Node *unary(Node *np) +{ + switch (rtok) { + case STAR: + rtok = relex(); + return (unary(op2(STAR, np, NIL))); + case PLUS: + rtok = relex(); + return (unary(op2(PLUS, np, NIL))); + case QUEST: + rtok = relex(); + return (unary(op2(QUEST, np, NIL))); + default: + return (np); + } +} + +/* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ + +/* Because isblank doesn't show up in any of the header files on any + * system i use, it's defined here. if some other locale has a richer + * definition of "blank", define HAS_ISBLANK and provide your own + * version. + * the parentheses here are an attempt to find a path through the maze + * of macro definition and/or function and/or version provided. thanks + * to nelson beebe for the suggestion; let's see if it works everywhere. + */ + +#ifndef HAS_ISBLANK + +int (xisblank)(int c) +{ + return c==' ' || c=='\t'; +} + +#endif + +struct charclass { + const char *cc_name; + int cc_namelen; + int (*cc_func)(int); +} charclasses[] = { + { "alnum", 5, isalnum }, + { "alpha", 5, isalpha }, +#ifndef HAS_ISBLANK + { "blank", 5, isspace }, /* was isblank */ +#else + { "blank", 5, isblank }, +#endif + { "cntrl", 5, iscntrl }, + { "digit", 5, isdigit }, + { "graph", 5, isgraph }, + { "lower", 5, islower }, + { "print", 5, isprint }, + { "punct", 5, ispunct }, + { "space", 5, isspace }, + { "upper", 5, isupper }, + { "xdigit", 6, isxdigit }, + { NULL, 0, NULL }, +}; + + +int relex(void) /* lexical analyzer for reparse */ +{ + int c, n; + int cflag; + static uschar *buf = 0; + static int bufsz = 100; + uschar *bp; + struct charclass *cc; + int i; + + switch (c = *prestr++) { + case '|': return OR; + case '*': return STAR; + case '+': return PLUS; + case '?': return QUEST; + case '.': return DOT; + case '\0': prestr--; return '\0'; + case '^': + case '$': + case '(': + case ')': + return c; + case '\\': + rlxval = quoted(&prestr); + return CHAR; + default: + rlxval = c; + return CHAR; + case '[': + if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + FATAL("out of space in reg expr %.10s..", lastre); + bp = buf; + if (*prestr == '^') { + cflag = 1; + prestr++; + } + else + cflag = 0; + n = 2 * strlen((const char *) prestr)+1; + if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) + FATAL("out of space for reg expr %.10s...", lastre); + for (; ; ) { + if ((c = *prestr++) == '\\') { + *bp++ = '\\'; + if ((c = *prestr++) == '\0') + FATAL("nonterminated character class %.20s...", lastre); + *bp++ = c; + /* } else if (c == '\n') { */ + /* FATAL("newline in character class %.20s...", lastre); */ + } else if (c == '[' && *prestr == ':') { + /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ + for (cc = charclasses; cc->cc_name; cc++) + if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) + break; + if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && + prestr[2 + cc->cc_namelen] == ']') { + prestr += cc->cc_namelen + 3; + for (i = 0; i < NCHARS; i++) { + if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) + FATAL("out of space for reg expr %.10s...", lastre); + if (cc->cc_func(i)) { + *bp++ = i; + n++; + } + } + } else + *bp++ = c; + } else if (c == '\0') { + FATAL("nonterminated character class %.20s", lastre); + } else if (bp == buf) { /* 1st char is special */ + *bp++ = c; + } else if (c == ']') { + *bp++ = 0; + rlxstr = (uschar *) tostring((char *) buf); + if (cflag == 0) + return CCL; + else + return NCCL; + } else + *bp++ = c; + } + } +} + +int cgoto(fa *f, int s, int c) +{ + int i, j, k; + int *p, *q; + + assert(c == HAT || c < NCHARS); + while (f->accept >= maxsetvec) { /* guessing here! */ + maxsetvec *= 4; + setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); + tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space in cgoto()"); + } + for (i = 0; i <= f->accept; i++) + setvec[i] = 0; + setcnt = 0; + /* compute positions of gototab[s,c] into setvec */ + p = f->posns[s]; + for (i = 1; i <= *p; i++) { + if ((k = f->re[p[i]].ltype) != FINAL) { + if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) + || (k == DOT && c != 0 && c != HAT) + || (k == ALL && c != 0) + || (k == EMPTYRE && c != 0) + || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) + || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { + q = f->re[p[i]].lfollow; + for (j = 1; j <= *q; j++) { + if (q[j] >= maxsetvec) { + maxsetvec *= 4; + setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); + tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); + if (setvec == 0 || tmpset == 0) + overflo("cgoto overflow"); + } + if (setvec[q[j]] == 0) { + setcnt++; + setvec[q[j]] = 1; + } + } + } + } + } + /* determine if setvec is a previous state */ + tmpset[0] = setcnt; + j = 1; + for (i = f->accept; i >= 0; i--) + if (setvec[i]) { + tmpset[j++] = i; + } + /* tmpset == previous state? */ + for (i = 1; i <= f->curstat; i++) { + p = f->posns[i]; + if ((k = tmpset[0]) != p[0]) + goto different; + for (j = 1; j <= k; j++) + if (tmpset[j] != p[j]) + goto different; + /* setvec is state i */ + f->gototab[s][c] = i; + return i; + different:; + } + + /* add tmpset to current set of states */ + if (f->curstat >= NSTATES-1) { + f->curstat = 2; + f->reset = 1; + for (i = 2; i < NSTATES; i++) + xfree(f->posns[i]); + } else + ++(f->curstat); + for (i = 0; i < NCHARS; i++) + f->gototab[f->curstat][i] = 0; + xfree(f->posns[f->curstat]); + if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) + overflo("out of space in cgoto"); + + f->posns[f->curstat] = p; + f->gototab[s][c] = f->curstat; + for (i = 0; i <= setcnt; i++) + p[i] = tmpset[i]; + if (setvec[f->accept]) + f->out[f->curstat] = 1; + else + f->out[f->curstat] = 0; + return f->curstat; +} + + +void freefa(fa *f) /* free a finite automaton */ +{ + int i; + + if (f == NULL) + return; + for (i = 0; i <= f->curstat; i++) + xfree(f->posns[i]); + for (i = 0; i <= f->accept; i++) { + xfree(f->re[i].lfollow); + if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) + xfree((f->re[i].lval.np)); + } + xfree(f->restr); + xfree(f); +} diff --git a/awk/lex.c b/awk/lex.c @@ -0,0 +1,597 @@ +/* $OpenBSD: lex.c,v 1.12 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "awk.h" +#include "ytab.h" + +extern YYSTYPE yylval; +extern int infunc; + +int lineno = 1; +int bracecnt = 0; +int brackcnt = 0; +int parencnt = 0; + +typedef struct Keyword { + const char *word; + int sub; + int type; +} Keyword; + +Keyword keywords[] ={ /* keep sorted: binary searched */ + { "BEGIN", XBEGIN, XBEGIN }, + { "END", XEND, XEND }, + { "NF", VARNF, VARNF }, + { "and", FAND, BLTIN }, + { "atan2", FATAN, BLTIN }, + { "break", BREAK, BREAK }, + { "close", CLOSE, CLOSE }, + { "compl", FCOMPL, BLTIN }, + { "continue", CONTINUE, CONTINUE }, + { "cos", FCOS, BLTIN }, + { "delete", DELETE, DELETE }, + { "do", DO, DO }, + { "else", ELSE, ELSE }, + { "exit", EXIT, EXIT }, + { "exp", FEXP, BLTIN }, + { "fflush", FFLUSH, BLTIN }, + { "for", FOR, FOR }, + { "func", FUNC, FUNC }, + { "function", FUNC, FUNC }, + { "getline", GETLINE, GETLINE }, + { "gsub", GSUB, GSUB }, + { "if", IF, IF }, + { "in", IN, IN }, + { "index", INDEX, INDEX }, + { "int", FINT, BLTIN }, + { "length", FLENGTH, BLTIN }, + { "log", FLOG, BLTIN }, + { "lshift", FLSHIFT, BLTIN }, + { "match", MATCHFCN, MATCHFCN }, + { "next", NEXT, NEXT }, + { "nextfile", NEXTFILE, NEXTFILE }, + { "or", FFOR, BLTIN }, + { "print", PRINT, PRINT }, + { "printf", PRINTF, PRINTF }, + { "rand", FRAND, BLTIN }, + { "return", RETURN, RETURN }, + { "rshift", FRSHIFT, BLTIN }, + { "sin", FSIN, BLTIN }, + { "split", SPLIT, SPLIT }, + { "sprintf", SPRINTF, SPRINTF }, + { "sqrt", FSQRT, BLTIN }, + { "srand", FSRAND, BLTIN }, + { "sub", SUB, SUB }, + { "substr", SUBSTR, SUBSTR }, + { "system", FSYSTEM, BLTIN }, + { "tolower", FTOLOWER, BLTIN }, + { "toupper", FTOUPPER, BLTIN }, + { "while", WHILE, WHILE }, + { "xor", FXOR, BLTIN }, +}; + +#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } + +int peek(void); +int gettok(char **, int *); +int binsearch(char *, Keyword *, int); + +int peek(void) +{ + int c = input(); + unput(c); + return c; +} + +int gettok(char **pbuf, int *psz) /* get next input token */ +{ + int c, retc; + char *buf = *pbuf; + int sz = *psz; + char *bp = buf; + + c = input(); + if (c == 0) + return 0; + buf[0] = c; + buf[1] = 0; + if (!isalnum(c) && c != '.' && c != '_') + return c; + + *bp++ = c; + if (isalpha(c) || c == '_') { /* it's a varname */ + for ( ; (c = input()) != 0; ) { + if (bp-buf >= sz) + if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) + FATAL( "out of space for name %.10s...", buf ); + if (isalnum(c) || c == '_') + *bp++ = c; + else { + *bp = 0; + unput(c); + break; + } + } + *bp = 0; + retc = 'a'; /* alphanumeric */ + } else { /* maybe it's a number, but could be . */ + char *rem; + /* read input until can't be a number */ + for ( ; (c = input()) != 0; ) { + if (bp-buf >= sz) + if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) + FATAL( "out of space for number %.10s...", buf ); + if (isdigit(c) || c == 'e' || c == 'E' + || c == '.' || c == '+' || c == '-') + *bp++ = c; + else { + unput(c); + break; + } + } + *bp = 0; + strtod(buf, &rem); /* parse the number */ + if (rem == buf) { /* it wasn't a valid number at all */ + buf[1] = 0; /* return one character as token */ + retc = buf[0]; /* character is its own type */ + unputstr(rem+1); /* put rest back for later */ + } else { /* some prefix was a number */ + unputstr(rem); /* put rest back for later */ + rem[0] = 0; /* truncate buf after number part */ + retc = '0'; /* type is number */ + } + } + *pbuf = buf; + *psz = sz; + return retc; +} + +int word(char *); +int string(void); +int regexpr(void); +int sc = 0; /* 1 => return a } right now */ +int reg = 0; /* 1 => return a REGEXPR now */ + +int yylex(void) +{ + int c; + static char *buf = 0; + static int bufsize = 5; /* BUG: setting this small causes core dump! */ + + if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) + FATAL( "out of space in yylex" ); + if (sc) { + sc = 0; + RET('}'); + } + if (reg) { + reg = 0; + return regexpr(); + } + for (;;) { + c = gettok(&buf, &bufsize); + if (c == 0) + return 0; + if (isalpha(c) || c == '_') + return word(buf); + if (isdigit(c)) { + yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); + /* should this also have STR set? */ + RET(NUMBER); + } + + yylval.i = c; + switch (c) { + case '\n': /* {EOL} */ + RET(NL); + case '\r': /* assume \n is coming */ + case ' ': /* {WS}+ */ + case '\t': + break; + case '#': /* #.* strip comments */ + while ((c = input()) != '\n' && c != 0) + ; + unput(c); + break; + case ';': + RET(';'); + case '\\': + if (peek() == '\n') { + input(); + } else if (peek() == '\r') { + input(); input(); /* \n */ + lineno++; + } else { + RET(c); + } + break; + case '&': + if (peek() == '&') { + input(); RET(AND); + } else + RET('&'); + case '|': + if (peek() == '|') { + input(); RET(BOR); + } else + RET('|'); + case '!': + if (peek() == '=') { + input(); yylval.i = NE; RET(NE); + } else if (peek() == '~') { + input(); yylval.i = NOTMATCH; RET(MATCHOP); + } else + RET(NOT); + case '~': + yylval.i = MATCH; + RET(MATCHOP); + case '<': + if (peek() == '=') { + input(); yylval.i = LE; RET(LE); + } else { + yylval.i = LT; RET(LT); + } + case '=': + if (peek() == '=') { + input(); yylval.i = EQ; RET(EQ); + } else { + yylval.i = ASSIGN; RET(ASGNOP); + } + case '>': + if (peek() == '=') { + input(); yylval.i = GE; RET(GE); + } else if (peek() == '>') { + input(); yylval.i = APPEND; RET(APPEND); + } else { + yylval.i = GT; RET(GT); + } + case '+': + if (peek() == '+') { + input(); yylval.i = INCR; RET(INCR); + } else if (peek() == '=') { + input(); yylval.i = ADDEQ; RET(ASGNOP); + } else + RET('+'); + case '-': + if (peek() == '-') { + input(); yylval.i = DECR; RET(DECR); + } else if (peek() == '=') { + input(); yylval.i = SUBEQ; RET(ASGNOP); + } else + RET('-'); + case '*': + if (peek() == '=') { /* *= */ + input(); yylval.i = MULTEQ; RET(ASGNOP); + } else if (peek() == '*') { /* ** or **= */ + input(); /* eat 2nd * */ + if (peek() == '=') { + input(); yylval.i = POWEQ; RET(ASGNOP); + } else { + RET(POWER); + } + } else + RET('*'); + case '/': + RET('/'); + case '%': + if (peek() == '=') { + input(); yylval.i = MODEQ; RET(ASGNOP); + } else + RET('%'); + case '^': + if (peek() == '=') { + input(); yylval.i = POWEQ; RET(ASGNOP); + } else + RET(POWER); + + case '$': + /* BUG: awkward, if not wrong */ + c = gettok(&buf, &bufsize); + if (isalpha(c)) { + if (strcmp(buf, "NF") == 0) { /* very special */ + unputstr("(NF)"); + RET(INDIRECT); + } + c = peek(); + if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { + unputstr(buf); + RET(INDIRECT); + } + yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); + RET(IVAR); + } else if (c == 0) { /* */ + SYNTAX( "unexpected end of input after $" ); + RET(';'); + } else { + unputstr(buf); + RET(INDIRECT); + } + + case '}': + if (--bracecnt < 0) + SYNTAX( "extra }" ); + sc = 1; + RET(';'); + case ']': + if (--brackcnt < 0) + SYNTAX( "extra ]" ); + RET(']'); + case ')': + if (--parencnt < 0) + SYNTAX( "extra )" ); + RET(')'); + case '{': + bracecnt++; + RET('{'); + case '[': + brackcnt++; + RET('['); + case '(': + parencnt++; + RET('('); + + case '"': + return string(); /* BUG: should be like tran.c ? */ + + default: + RET(c); + } + } +} + +int string(void) +{ + int c, n; + char *s, *bp; + static char *buf = 0; + static int bufsz = 500; + + if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of space for strings"); + for (bp = buf; (c = input()) != '"'; ) { + if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) + FATAL("out of space for string %.10s...", buf); + switch (c) { + case '\n': + case '\r': + case 0: + SYNTAX( "non-terminated string %.10s...", buf ); + lineno++; + if (c == 0) /* hopeless */ + FATAL( "giving up" ); + break; + case '\\': + c = input(); + switch (c) { + case '"': *bp++ = '"'; break; + case 'n': *bp++ = '\n'; break; + case 't': *bp++ = '\t'; break; + case 'f': *bp++ = '\f'; break; + case 'r': *bp++ = '\r'; break; + case 'b': *bp++ = '\b'; break; + case 'v': *bp++ = '\v'; break; + case 'a': *bp++ = '\007'; break; + case '\\': *bp++ = '\\'; break; + + case '0': case '1': case '2': /* octal: \d \dd \ddd */ + case '3': case '4': case '5': case '6': case '7': + n = c - '0'; + if ((c = peek()) >= '0' && c < '8') { + n = 8 * n + input() - '0'; + if ((c = peek()) >= '0' && c < '8') + n = 8 * n + input() - '0'; + } + *bp++ = n; + break; + + case 'x': /* hex \x0-9a-fA-F + */ + { char xbuf[100], *px; + for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) { + if (isdigit(c) + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F')) + *px++ = c; + else + break; + } + *px = 0; + unput(c); + sscanf(xbuf, "%x", (unsigned int *) &n); + *bp++ = n; + break; + } + + default: + *bp++ = c; + break; + } + break; + default: + *bp++ = c; + break; + } + } + *bp = 0; + s = tostring(buf); + *bp++ = ' '; *bp++ = 0; + yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); + RET(STRING); +} + + +int binsearch(char *w, Keyword *kp, int n) +{ + int cond, low, mid, high; + + low = 0; + high = n - 1; + while (low <= high) { + mid = (low + high) / 2; + if ((cond = strcmp(w, kp[mid].word)) < 0) + high = mid - 1; + else if (cond > 0) + low = mid + 1; + else + return mid; + } + return -1; +} + +int word(char *w) +{ + Keyword *kp; + int c, n; + + n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); +/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ + kp = keywords + n; + if (n != -1) { /* found in table */ + yylval.i = kp->sub; + switch (kp->type) { /* special handling */ + case BLTIN: + if (kp->sub == FSYSTEM && safe) + SYNTAX( "system is unsafe" ); + RET(kp->type); + case FUNC: + if (infunc) + SYNTAX( "illegal nested function" ); + RET(kp->type); + case RETURN: + if (!infunc) + SYNTAX( "return not in function" ); + RET(kp->type); + case VARNF: + yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); + RET(VARNF); + default: + RET(kp->type); + } + } + c = peek(); /* look for '(' */ + if (c != '(' && infunc && (n=isarg(w)) >= 0) { + yylval.i = n; + RET(ARG); + } else { + yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); + if (c == '(') { + RET(CALL); + } else { + RET(VAR); + } + } +} + +void startreg(void) /* next call to yylex will return a regular expression */ +{ + reg = 1; +} + +int regexpr(void) +{ + int c, openclass = 0; + static char *buf = 0; + static int bufsz = 500; + char *bp; + + if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of space for rex expr"); + bp = buf; + for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) { + if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) + FATAL("out of space for reg expr %.10s...", buf); + if (c == '\n') { + SYNTAX( "newline in regular expression %.10s...", buf ); + unput('\n'); + break; + } else if (c == '\\') { + *bp++ = '\\'; + *bp++ = input(); + } else { + if (c == '[') + openclass = 1; + else if (c == ']') + openclass = 0; + *bp++ = c; + } + } + *bp = 0; + if (c == 0) + SYNTAX("non-terminated regular expression %.10s...", buf); + yylval.s = tostring(buf); + unput('/'); + RET(REGEXPR); +} + +/* low-level lexical stuff, sort of inherited from lex */ + +char ebuf[300]; +char *ep = ebuf; +char yysbuf[100]; /* pushback buffer */ +char *yysptr = yysbuf; +FILE *yyin = 0; + +int input(void) /* get next lexical input character */ +{ + int c; + extern char *lexprog; + + if (yysptr > yysbuf) + c = (uschar)*--yysptr; + else if (lexprog != NULL) { /* awk '...' */ + if ((c = (uschar)*lexprog) != 0) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == '\n') + lineno++; + else if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + return *ep++ = c; +} + +void unput(int c) /* put lexical character back on input */ +{ + if (c == '\n') + lineno--; + if (yysptr >= yysbuf + sizeof(yysbuf)) + FATAL("pushed back too much: %.20s...", yysbuf); + *yysptr++ = c; + if (--ep < ebuf) + ep = ebuf + sizeof(ebuf) - 1; +} + +void unputstr(const char *s) /* put a string back on input */ +{ + int i; + + for (i = strlen(s)-1; i >= 0; i--) + unput(s[i]); +} diff --git a/awk/lib.c b/awk/lib.c @@ -0,0 +1,739 @@ +/* $OpenBSD: lib.c,v 1.20 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#define DEBUG +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdarg.h> +#include "awk.h" +#include "util.h" +#include "ytab.h" + +FILE *infile = NULL; +char *file = ""; +char *record; +int recsize = RECSIZE; +char *fields; +int fieldssize = RECSIZE; + +Cell **fldtab; /* pointers to Cells */ +char inputFS[100] = " "; + +#define MAXFLD 2 +int nfields = MAXFLD; /* last allocated slot for $i */ + +int donefld; /* 1 = implies rec broken into fields */ +int donerec; /* 1 = record is valid (no flds have changed) */ + +int lastfld = 0; /* last used field */ +int argno = 1; /* current input argument number */ +extern Awkfloat *ARGC; + +static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; +static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; + +void recinit(unsigned int n) +{ + if ( (record = (char *) malloc(n)) == NULL + || (fields = (char *) malloc(n+1)) == NULL + || (fldtab = (Cell **) calloc(nfields+1, sizeof(Cell *))) == NULL + || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) + FATAL("out of space for $0 and fields"); + *fldtab[0] = dollar0; + fldtab[0]->sval = record; + fldtab[0]->nval = tostring("0"); + makefields(1, nfields); +} + +void makefields(int n1, int n2) /* create $n1..$n2 inclusive */ +{ + char temp[50]; + int i; + + for (i = n1; i <= n2; i++) { + fldtab[i] = (Cell *) malloc(sizeof (struct Cell)); + if (fldtab[i] == NULL) + FATAL("out of space in makefields %d", i); + *fldtab[i] = dollar1; + snprintf(temp, sizeof temp, "%d", i); + fldtab[i]->nval = tostring(temp); + } +} + +void initgetrec(void) +{ + int i; + char *p; + + for (i = 1; i < *ARGC; i++) { + p = getargv(i); /* find 1st real filename */ + if (p == NULL || *p == '\0') { /* deleted or zapped */ + argno++; + continue; + } + if (!isclvar(p)) { + setsval(lookup("FILENAME", symtab), p); + return; + } + setclvar(p); /* a commandline assignment before filename */ + argno++; + } + infile = stdin; /* no filenames, so use stdin */ +} + +static int firsttime = 1; + +int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ +{ /* note: cares whether buf == record */ + int c; + char *buf = *pbuf; + uschar saveb0; + int bufsize = *pbufsize, savebufsize = bufsize; + + if (firsttime) { + firsttime = 0; + initgetrec(); + } + dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", + *RS, *FS, *ARGC, *FILENAME) ); + if (isrecord) { + donefld = 0; + donerec = 1; + } + saveb0 = buf[0]; + buf[0] = 0; + while (argno < *ARGC || infile == stdin) { + dprintf( ("argno=%d, file=|%s|\n", argno, file) ); + if (infile == NULL) { /* have to open a new file */ + file = getargv(argno); + if (file == NULL || *file == '\0') { /* deleted or zapped */ + argno++; + continue; + } + if (isclvar(file)) { /* a var=value arg */ + setclvar(file); + argno++; + continue; + } + *FILENAME = file; + dprintf( ("opening file %s\n", file) ); + if (*file == '-' && *(file+1) == '\0') + infile = stdin; + else if ((infile = fopen(file, "r")) == NULL) + FATAL("can't open file %s", file); + setfval(fnrloc, 0.0); + } + c = readrec(&buf, &bufsize, infile); + if (c != 0 || buf[0] != '\0') { /* normal record */ + if (isrecord) { + if (freeable(fldtab[0])) + xfree(fldtab[0]->sval); + fldtab[0]->sval = buf; /* buf == record */ + fldtab[0]->tval = REC | STR | DONTFREE; + if (is_number(fldtab[0]->sval)) { + fldtab[0]->fval = atof(fldtab[0]->sval); + fldtab[0]->tval |= NUM; + } + } + setfval(nrloc, nrloc->fval+1); + setfval(fnrloc, fnrloc->fval+1); + *pbuf = buf; + *pbufsize = bufsize; + return 1; + } + /* EOF arrived on this file; set up next */ + if (infile != stdin) + fclose(infile); + infile = NULL; + argno++; + } + buf[0] = saveb0; + *pbuf = buf; + *pbufsize = savebufsize; + return 0; /* true end of file */ +} + +void nextfile(void) +{ + if (infile != NULL && infile != stdin) + fclose(infile); + infile = NULL; + argno++; +} + +int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ +{ + int sep, c; + char *rr, *buf = *pbuf; + int bufsize = *pbufsize; + + if (strlen(*FS) >= sizeof(inputFS)) + FATAL("field separator %.10s... is too long", *FS); + /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ + strlcpy(inputFS, *FS, sizeof inputFS); /* for subsequent field splitting */ + if ((sep = **RS) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + for (rr = buf; ; ) { + for (; (c=getc(inf)) != sep && c != EOF; ) { + if (rr-buf+1 > bufsize) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = c; + } + if (**RS == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = '\n'; + *rr++ = c; + } + if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) + FATAL("input record `%.30s...' too long", buf); + *rr = 0; + dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); + *pbuf = buf; + *pbufsize = bufsize; + return c == EOF && rr == buf ? 0 : 1; +} + +char *getargv(int n) /* get ARGV[n] */ +{ + Cell *x; + char *s, temp[50]; + extern Array *ARGVtab; + + snprintf(temp, sizeof temp, "%d", n); + if (lookup(temp, ARGVtab) == NULL) + return NULL; + x = setsymtab(temp, "", 0.0, STR, ARGVtab); + s = getsval(x); + dprintf( ("getargv(%d) returns |%s|\n", n, s) ); + return s; +} + +void setclvar(char *s) /* set var=value from s */ +{ + char *p; + Cell *q; + + for (p=s; *p != '='; p++) + ; + *p++ = 0; + p = qstring(p, '\0'); + q = setsymtab(s, p, 0.0, STR, symtab); + setsval(q, p); + if (is_number(q->sval)) { + q->fval = atof(q->sval); + q->tval |= NUM; + } + dprintf( ("command line set %s to |%s|\n", s, p) ); +} + + +void fldbld(void) /* create fields from current record */ +{ + /* this relies on having fields[] the same length as $0 */ + /* the fields are all stored in this one array with \0's */ + /* possibly with a final trailing \0 not associated with any field */ + char *r, *fr, sep; + Cell *p; + int i, j, n; + + if (donefld) + return; + if (!isstr(fldtab[0])) + getsval(fldtab[0]); + r = fldtab[0]->sval; + n = strlen(r); + if (n > fieldssize) { + xfree(fields); + if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */ + FATAL("out of space for fields in fldbld %d", n); + fieldssize = n; + } + fr = fields; + i = 0; /* number of fields accumulated here */ + strlcpy(inputFS, *FS, sizeof(inputFS)); + if (strlen(inputFS) > 1) { /* it's a regular expression */ + i = refldbld(r, inputFS); + } else if ((sep = *inputFS) == ' ') { /* default whitespace */ + for (i = 0; ; ) { + while (*r == ' ' || *r == '\t' || *r == '\n') + r++; + if (*r == 0) + break; + i++; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + do + *fr++ = *r++; + while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0'); + *fr++ = 0; + } + *fr = 0; + } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ + for (i = 0; *r != 0; r++) { + char buf[2]; + i++; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + buf[0] = *r; + buf[1] = 0; + fldtab[i]->sval = tostring(buf); + fldtab[i]->tval = FLD | STR; + } + *fr = 0; + } else if (*r != 0) { /* if 0, it's a null field */ + /* subtlecase : if length(FS) == 1 && length(RS > 0) + * \n is NOT a field separator (cf awk book 61,84). + * this variable is tested in the inner while loop. + */ + int rtest = '\n'; /* normal case */ + if (strlen(*RS) > 0) + rtest = '\0'; + for (;;) { + i++; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */ + *fr++ = *r++; + *fr++ = 0; + if (*r++ == 0) + break; + } + *fr = 0; + } + if (i > nfields) + FATAL("record `%.30s...' has too many fields; can't happen", r); + cleanfld(i+1, lastfld); /* clean out junk from previous record */ + lastfld = i; + donefld = 1; + for (j = 1; j <= lastfld; j++) { + p = fldtab[j]; + if(is_number(p->sval)) { + p->fval = atof(p->sval); + p->tval |= NUM; + } + } + setfval(nfloc, (Awkfloat) lastfld); + if (dbg) { + for (j = 0; j <= lastfld; j++) { + p = fldtab[j]; + printf("field %d (%s): |%s|\n", j, p->nval, p->sval); + } + } +} + +void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */ +{ /* nvals remain intact */ + Cell *p; + int i; + + for (i = n1; i <= n2; i++) { + p = fldtab[i]; + if (freeable(p)) + xfree(p->sval); + p->sval = ""; + p->tval = FLD | STR | DONTFREE; + } +} + +void newfld(int n) /* add field n after end of existing lastfld */ +{ + if (n > nfields) + growfldtab(n); + cleanfld(lastfld+1, n); + lastfld = n; + setfval(nfloc, (Awkfloat) n); +} + +Cell *fieldadr(int n) /* get nth field */ +{ + if (n < 0) + FATAL("trying to access out of range field %d", n); + if (n > nfields) /* fields after NF are empty */ + growfldtab(n); /* but does not increase NF */ + return(fldtab[n]); +} + +void growfldtab(int n) /* make new fields up to at least $n */ +{ + int nf = 2 * nfields; + size_t s; + + if (n > nf) + nf = n; + s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */ + if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */ + fldtab = (Cell **) realloc(fldtab, s); + else /* overflow sizeof int */ + xfree(fldtab); /* make it null */ + if (fldtab == NULL) + FATAL("out of space creating %d fields", nf); + makefields(nfields+1, nf); + nfields = nf; +} + +int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */ +{ + /* this relies on having fields[] the same length as $0 */ + /* the fields are all stored in this one array with \0's */ + char *fr; + int i, tempstat, n; + fa *pfa; + + n = strlen(rec); + if (n > fieldssize) { + xfree(fields); + if ((fields = (char *) malloc(n+1)) == NULL) + FATAL("out of space for fields in refldbld %d", n); + fieldssize = n; + } + fr = fields; + *fr = '\0'; + if (*rec == '\0') + return 0; + pfa = makedfa(fs, 1); + dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); + tempstat = pfa->initstat; + for (i = 1; ; i++) { + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->tval = FLD | STR | DONTFREE; + fldtab[i]->sval = fr; + dprintf( ("refldbld: i=%d\n", i) ); + if (nematch(pfa, rec)) { + pfa->initstat = 2; /* horrible coupling to b.c */ + dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); + strncpy(fr, rec, patbeg-rec); + fr += patbeg - rec + 1; + *(fr-1) = '\0'; + rec = patbeg + patlen; + } else { + dprintf( ("no match %s\n", rec) ); + strlcpy(fr, rec, fields + fieldssize - fr); + pfa->initstat = tempstat; + break; + } + } + return i; +} + +void recbld(void) /* create $0 from $1..$NF if necessary */ +{ + int i; + char *r, *p; + + if (donerec == 1) + return; + r = record; + for (i = 1; i <= *NF; i++) { + p = getsval(fldtab[i]); + if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1")) + FATAL("created $0 `%.30s...' too long", record); + while ((*r = *p++) != 0) + r++; + if (i < *NF) { + if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) + FATAL("created $0 `%.30s...' too long", record); + for (p = *OFS; (*r = *p++) != 0; ) + r++; + } + } + if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3")) + FATAL("built giant record `%.30s...'", record); + *r = '\0'; + dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); + + if (freeable(fldtab[0])) + xfree(fldtab[0]->sval); + fldtab[0]->tval = REC | STR | DONTFREE; + fldtab[0]->sval = record; + + dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); + dprintf( ("recbld = |%s|\n", record) ); + donerec = 1; +} + +int errorflag = 0; + +void yyerror(const char *s) +{ + SYNTAX("%s", s); +} + +void SYNTAX(const char *fmt, ...) +{ + extern char *cmdname, *curfname; + static int been_here = 0; + va_list varg; + + if (been_here++ > 2) + return; + fprintf(stderr, "%s: ", cmdname); + va_start(varg, fmt); + vfprintf(stderr, fmt, varg); + va_end(varg); + fprintf(stderr, " at source line %d", lineno); + if (curfname != NULL) + fprintf(stderr, " in function %s", curfname); + if (compile_time == 1 && cursource() != NULL) + fprintf(stderr, " source file %s", cursource()); + fprintf(stderr, "\n"); + errorflag = 2; + eprint(); +} + +void fpecatch(int sig) +{ + extern Node *curnode; + char buf[1024]; + + snprintf(buf, sizeof buf, "floating point exception\n"); + write(STDERR_FILENO, buf, strlen(buf)); + + if (compile_time != 2 && NR && *NR > 0) { + snprintf(buf, sizeof buf, " input record number %d", (int) (*FNR)); + write(STDERR_FILENO, buf, strlen(buf)); + + if (strcmp(*FILENAME, "-") != 0) { + snprintf(buf, sizeof buf, ", file %s", *FILENAME); + write(STDERR_FILENO, buf, strlen(buf)); + } + write(STDERR_FILENO, "\n", 1); + } + if (compile_time != 2 && curnode) { + snprintf(buf, sizeof buf, " source line number %d", curnode->lineno); + write(STDERR_FILENO, buf, strlen(buf)); + } else if (compile_time != 2 && lineno) { + snprintf(buf, sizeof buf, " source line number %d", lineno); + write(STDERR_FILENO, buf, strlen(buf)); + } + if (compile_time == 1 && cursource() != NULL) { + snprintf(buf, sizeof buf, " source file %s", cursource()); + write(STDERR_FILENO, buf, strlen(buf)); + } + write(STDERR_FILENO, "\n", 1); + if (dbg > 1) /* core dump if serious debugging on */ + abort(); + _exit(1); +} + +extern int bracecnt, brackcnt, parencnt; + +void bracecheck(void) +{ + int c; + static int beenhere = 0; + + if (beenhere++) + return; + while ((c = input()) != EOF && c != '\0') + bclass(c); + bcheck2(bracecnt, '{', '}'); + bcheck2(brackcnt, '[', ']'); + bcheck2(parencnt, '(', ')'); +} + +void bcheck2(int n, int c1, int c2) +{ + if (n == 1) + fprintf(stderr, "\tmissing %c\n", c2); + else if (n > 1) + fprintf(stderr, "\t%d missing %c's\n", n, c2); + else if (n == -1) + fprintf(stderr, "\textra %c\n", c2); + else if (n < -1) + fprintf(stderr, "\t%d extra %c's\n", -n, c2); +} + +void FATAL(const char *fmt, ...) +{ + extern char *cmdname; + va_list varg; + + fflush(stdout); + fprintf(stderr, "%s: ", cmdname); + va_start(varg, fmt); + vfprintf(stderr, fmt, varg); + va_end(varg); + error(); + if (dbg > 1) /* core dump if serious debugging on */ + abort(); + exit(2); +} + +void WARNING(const char *fmt, ...) +{ + extern char *cmdname; + va_list varg; + + fflush(stdout); + fprintf(stderr, "%s: ", cmdname); + va_start(varg, fmt); + vfprintf(stderr, fmt, varg); + va_end(varg); + error(); +} + +void error() +{ + extern Node *curnode; + + fprintf(stderr, "\n"); + if (compile_time != 2 && NR && *NR > 0) { + fprintf(stderr, " input record number %d", (int) (*FNR)); + if (strcmp(*FILENAME, "-") != 0) + fprintf(stderr, ", file %s", *FILENAME); + fprintf(stderr, "\n"); + } + if (compile_time != 2 && curnode) + fprintf(stderr, " source line number %d", curnode->lineno); + else if (compile_time != 2 && lineno) + fprintf(stderr, " source line number %d", lineno); + if (compile_time == 1 && cursource() != NULL) + fprintf(stderr, " source file %s", cursource()); + fprintf(stderr, "\n"); + eprint(); +} + +void eprint(void) /* try to print context around error */ +{ + char *p, *q; + int c; + static int been_here = 0; + extern char ebuf[], *ep; + + if (compile_time == 2 || compile_time == 0 || been_here++ > 0) + return; + p = ep - 1; + if (p > ebuf && *p == '\n') + p--; + for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--) + ; + while (*p == '\n') + p++; + fprintf(stderr, " context is\n\t"); + for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) + ; + for ( ; p < q; p++) + if (*p) + putc(*p, stderr); + fprintf(stderr, " >>> "); + for ( ; p < ep; p++) + if (*p) + putc(*p, stderr); + fprintf(stderr, " <<< "); + if (*ep) + while ((c = input()) != '\n' && c != '\0' && c != EOF) { + putc(c, stderr); + bclass(c); + } + putc('\n', stderr); + ep = ebuf; +} + +void bclass(int c) +{ + switch (c) { + case '{': bracecnt++; break; + case '}': bracecnt--; break; + case '[': brackcnt++; break; + case ']': brackcnt--; break; + case '(': parencnt++; break; + case ')': parencnt--; break; + } +} + +double errcheck(double x, const char *s) +{ + + if (errno == EDOM) { + errno = 0; + WARNING("%s argument out of domain", s); + x = 1; + } else if (errno == ERANGE) { + errno = 0; + WARNING("%s result out of range", s); + x = 1; + } + return x; +} + +int isclvar(const char *s) /* is s of form var=something ? */ +{ + const char *os = s; + + if (!isalpha((uschar) *s) && *s != '_') + return 0; + for ( ; *s; s++) + if (!(isalnum((uschar) *s) || *s == '_')) + break; + return *s == '=' && s > os && *(s+1) != '='; +} + +/* strtod is supposed to be a proper test of what's a valid number */ +/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */ +/* wrong: violates 4.10.1.4 of ansi C standard */ + +#include <math.h> +int is_number(const char *s) +{ + double r; + char *ep; + errno = 0; + r = strtod(s, &ep); + if (ep == s || r == HUGE_VAL || errno == ERANGE) + return 0; + while (*ep == ' ' || *ep == '\t' || *ep == '\n') + ep++; + if (*ep == '\0') + return 1; + else + return 0; +} diff --git a/awk/main.c b/awk/main.c @@ -0,0 +1,212 @@ +/* $OpenBSD: main.c,v 1.17 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +const char *version = "version 20110810"; + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include "awk.h" +#include "ytab.h" + +extern char **environ; +extern int nfields; +extern char *__progname; + +int dbg = 0; +Awkfloat srand_seed = 1; +char *cmdname; /* gets argv[0] for error messages */ +extern FILE *yyin; /* lex input file */ +char *lexprog; /* points to program argument if it exists */ +extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ +int compile_time = 2; /* for error printing: */ + /* 2 = cmdline, 1 = compile, 0 = running */ + +#define MAX_PFILE 20 /* max number of -f's */ + +char *pfile[MAX_PFILE]; /* program filenames from -f's */ +int npfile = 0; /* number of filenames */ +int curpfile = 0; /* current filename */ + +int safe = 0; /* 1 => "safe" mode */ + +int main(int argc, char *argv[]) +{ + const char *fs = NULL; + + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ + cmdname = __progname; + if (argc == 1) { + fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] [-F fs] " + "[-v var=value] [prog | -f progfile]\n\tfile ...\n", + cmdname); + exit(1); + } + signal(SIGFPE, fpecatch); + + yyin = NULL; + symtab = makesymtab(NSYMTAB); + while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ + argc--; + argv++; + break; + } + switch (argv[1][1]) { + case 's': + if (strcmp(argv[1], "-safe") == 0) + safe = 1; + break; + case 'f': /* next argument is program filename */ + if (argv[1][2] != 0) { /* arg is -fsomething */ + if (npfile >= MAX_PFILE - 1) + FATAL("too many -f options"); + pfile[npfile++] = &argv[1][2]; + } else { /* arg is -f something */ + argc--; argv++; + if (argc <= 1) + FATAL("no program filename"); + if (npfile >= MAX_PFILE - 1) + FATAL("too many -f options"); + pfile[npfile++] = argv[1]; + } + break; + case 'F': /* set field separator */ + if (argv[1][2] != 0) { /* arg is -Fsomething */ + if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */ + fs = "\t"; + else if (argv[1][2] != 0) + fs = &argv[1][2]; + } else { /* arg is -F something */ + argc--; argv++; + if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */ + fs = "\t"; + else if (argc > 1 && argv[1][0] != 0) + fs = &argv[1][0]; + } + if (fs == NULL || *fs == '\0') + WARNING("field separator FS is empty"); + break; + case 'v': /* -v a=1 to be done NOW. one -v for each */ + if (argv[1][2] != 0) { /* arg is -vsomething */ + if (isclvar(&argv[1][2])) + setclvar(&argv[1][2]); + else + FATAL("invalid -v option argument: %s", &argv[1][2]); + } else { /* arg is -v something */ + argc--; argv++; + if (argc <= 1) + FATAL("no variable name"); + if (isclvar(argv[1])) + setclvar(argv[1]); + else + FATAL("invalid -v option argument: %s", argv[1]); + } + break; + case 'd': + dbg = atoi(&argv[1][2]); + if (dbg == 0) + dbg = 1; + printf("awk %s\n", version); + break; + case 'V': /* added for exptools "standard" */ + printf("awk %s\n", version); + exit(0); + break; + default: + WARNING("unknown option %s ignored", argv[1]); + break; + } + argc--; + argv++; + } + /* argv[1] is now the first argument */ + if (npfile == 0) { /* no -f; first argument is program */ + if (argc <= 1) { + if (dbg) + exit(0); + FATAL("no program given"); + } + dprintf( ("program = |%s|\n", argv[1]) ); + lexprog = argv[1]; + argc--; + argv++; + } + recinit(recsize); + syminit(); + compile_time = 1; + argv[0] = cmdname; /* put prog name at front of arglist */ + dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); + arginit(argc, argv); + if (!safe) + envinit(environ); + yyparse(); + setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */ + if (fs) + *FS = qstring(fs, '\0'); + dprintf( ("errorflag=%d\n", errorflag) ); + if (errorflag == 0) { + compile_time = 0; + run(winner); + } else + bracecheck(); + return(errorflag); +} + +int pgetc(void) /* get 1 character from awk program */ +{ + int c; + + for (;;) { + if (yyin == NULL) { + if (curpfile >= npfile) + return EOF; + if (strcmp(pfile[curpfile], "-") == 0) + yyin = stdin; + else if ((yyin = fopen(pfile[curpfile], "r")) == NULL) + FATAL("can't open file %s", pfile[curpfile]); + lineno = 1; + } + if ((c = getc(yyin)) != EOF) + return c; + if (yyin != stdin) + fclose(yyin); + yyin = NULL; + curpfile++; + } +} + +char *cursource(void) /* current source file name */ +{ + if (npfile > 0) + return pfile[curpfile]; + else + return NULL; +} diff --git a/awk/maketab.c b/awk/maketab.c @@ -0,0 +1,172 @@ +/* $OpenBSD: maketab.c,v 1.11 2010/06/13 17:58:19 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +/* + * this program makes the table to link function names + * and type indices that is used by execute() in run.c. + * it finds the indices in ytab.h, produced by yacc. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "awk.h" +#include "ytab.h" + +struct xx +{ int token; + const char *name; + const char *pname; +} proc[] = { + { PROGRAM, "program", NULL }, + { BOR, "boolop", " || " }, + { AND, "boolop", " && " }, + { NOT, "boolop", " !" }, + { NE, "relop", " != " }, + { EQ, "relop", " == " }, + { LE, "relop", " <= " }, + { LT, "relop", " < " }, + { GE, "relop", " >= " }, + { GT, "relop", " > " }, + { ARRAY, "array", NULL }, + { INDIRECT, "indirect", "$(" }, + { SUBSTR, "substr", "substr" }, + { SUB, "sub", "sub" }, + { GSUB, "gsub", "gsub" }, + { INDEX, "sindex", "sindex" }, + { SPRINTF, "awksprintf", "sprintf " }, + { ADD, "arith", " + " }, + { MINUS, "arith", " - " }, + { MULT, "arith", " * " }, + { DIVIDE, "arith", " / " }, + { MOD, "arith", " % " }, + { UMINUS, "arith", " -" }, + { POWER, "arith", " **" }, + { PREINCR, "incrdecr", "++" }, + { POSTINCR, "incrdecr", "++" }, + { PREDECR, "incrdecr", "--" }, + { POSTDECR, "incrdecr", "--" }, + { CAT, "cat", " " }, + { PASTAT, "pastat", NULL }, + { PASTAT2, "dopa2", NULL }, + { MATCH, "matchop", " ~ " }, + { NOTMATCH, "matchop", " !~ " }, + { MATCHFCN, "matchop", "matchop" }, + { INTEST, "intest", "intest" }, + { PRINTF, "awkprintf", "printf" }, + { PRINT, "printstat", "print" }, + { CLOSE, "closefile", "closefile" }, + { DELETE, "awkdelete", "awkdelete" }, + { SPLIT, "split", "split" }, + { ASSIGN, "assign", " = " }, + { ADDEQ, "assign", " += " }, + { SUBEQ, "assign", " -= " }, + { MULTEQ, "assign", " *= " }, + { DIVEQ, "assign", " /= " }, + { MODEQ, "assign", " %= " }, + { POWEQ, "assign", " ^= " }, + { CONDEXPR, "condexpr", " ?: " }, + { IF, "ifstat", "if(" }, + { WHILE, "whilestat", "while(" }, + { FOR, "forstat", "for(" }, + { DO, "dostat", "do" }, + { IN, "instat", "instat" }, + { NEXT, "jump", "next" }, + { NEXTFILE, "jump", "nextfile" }, + { EXIT, "jump", "exit" }, + { BREAK, "jump", "break" }, + { CONTINUE, "jump", "continue" }, + { RETURN, "jump", "ret" }, + { BLTIN, "bltin", "bltin" }, + { CALL, "call", "call" }, + { ARG, "arg", "arg" }, + { VARNF, "getnf", "NF" }, + { GETLINE, "awkgetline", "getline" }, + { 0, "", "" }, +}; + +#define SIZE (LASTTOKEN - FIRSTTOKEN + 1) +const char *table[SIZE]; +char *names[SIZE]; + +int main(int argc, char *argv[]) +{ + const struct xx *p; + int i, n, tok; + char c; + FILE *fp; + char buf[200], name[200], def[200]; + + printf("#include <stdio.h>\n"); + printf("#include \"awk.h\"\n"); + printf("#include \"ytab.h\"\n\n"); + for (i = SIZE; --i >= 0; ) + names[i] = ""; + + if ((fp = fopen("ytab.h", "r")) == NULL) { + fprintf(stderr, "maketab: can't open ytab.h!\n"); + exit(1); + } + printf("static char *printname[%d] = {\n", SIZE); + i = 0; + while (fgets(buf, sizeof buf, fp) != NULL) { + n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); + if (n != 4 || c != '#' || strcmp(def, "define") != 0) + continue; /* not a valid #define */ + if (tok < FIRSTTOKEN || tok > LASTTOKEN) { + /* fprintf(stderr, "maketab: funny token %d %s ignored\n", tok, buf); */ + continue; + } + names[tok-FIRSTTOKEN] = (char *) strdup(name); + if (names[tok-FIRSTTOKEN] == NULL) { + fprintf(stderr, "maketab: out of memory\n"); + exit(1); + } + printf("\t(char *) \"%s\",\t/* %d */\n", name, tok); + i++; + } + printf("};\n\n"); + + for (p=proc; p->token!=0; p++) + table[p->token-FIRSTTOKEN] = p->name; + printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); + for (i=0; i<SIZE; i++) + if (table[i]==0) + printf("\tnullproc,\t/* %s */\n", names[i]); + else + printf("\t%s,\t/* %s */\n", table[i], names[i]); + printf("};\n\n"); + + printf("char *tokname(int n)\n"); /* print a tokname() function */ + printf("{\n"); + printf(" static char buf[100];\n\n"); + printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); + printf(" snprintf(buf, sizeof buf, \"token %%d\", n);\n"); + printf(" return buf;\n"); + printf(" }\n"); + printf(" return printname[n-FIRSTTOKEN];\n"); + printf("}\n"); + return 0; +} diff --git a/awk/parse.c b/awk/parse.c @@ -0,0 +1,277 @@ +/* $OpenBSD: parse.c,v 1.6 2002/12/19 21:24:28 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#define DEBUG +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "awk.h" +#include "ytab.h" + +Node *nodealloc(int n) +{ + Node *x; + + x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); + if (x == NULL) + FATAL("out of space in nodealloc"); + x->nnext = NULL; + x->lineno = lineno; + return(x); +} + +Node *exptostat(Node *a) +{ + a->ntype = NSTAT; + return(a); +} + +Node *node1(int a, Node *b) +{ + Node *x; + + x = nodealloc(1); + x->nobj = a; + x->narg[0]=b; + return(x); +} + +Node *node2(int a, Node *b, Node *c) +{ + Node *x; + + x = nodealloc(2); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + return(x); +} + +Node *node3(int a, Node *b, Node *c, Node *d) +{ + Node *x; + + x = nodealloc(3); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + return(x); +} + +Node *node4(int a, Node *b, Node *c, Node *d, Node *e) +{ + Node *x; + + x = nodealloc(4); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + return(x); +} + +Node *stat1(int a, Node *b) +{ + Node *x; + + x = node1(a,b); + x->ntype = NSTAT; + return(x); +} + +Node *stat2(int a, Node *b, Node *c) +{ + Node *x; + + x = node2(a,b,c); + x->ntype = NSTAT; + return(x); +} + +Node *stat3(int a, Node *b, Node *c, Node *d) +{ + Node *x; + + x = node3(a,b,c,d); + x->ntype = NSTAT; + return(x); +} + +Node *stat4(int a, Node *b, Node *c, Node *d, Node *e) +{ + Node *x; + + x = node4(a,b,c,d,e); + x->ntype = NSTAT; + return(x); +} + +Node *op1(int a, Node *b) +{ + Node *x; + + x = node1(a,b); + x->ntype = NEXPR; + return(x); +} + +Node *op2(int a, Node *b, Node *c) +{ + Node *x; + + x = node2(a,b,c); + x->ntype = NEXPR; + return(x); +} + +Node *op3(int a, Node *b, Node *c, Node *d) +{ + Node *x; + + x = node3(a,b,c,d); + x->ntype = NEXPR; + return(x); +} + +Node *op4(int a, Node *b, Node *c, Node *d, Node *e) +{ + Node *x; + + x = node4(a,b,c,d,e); + x->ntype = NEXPR; + return(x); +} + +Node *celltonode(Cell *a, int b) +{ + Node *x; + + a->ctype = OCELL; + a->csub = b; + x = node1(0, (Node *) a); + x->ntype = NVALUE; + return(x); +} + +Node *rectonode(void) /* make $0 into a Node */ +{ + extern Cell *literal0; + return op1(INDIRECT, celltonode(literal0, CUNK)); +} + +Node *makearr(Node *p) +{ + Cell *cp; + + if (isvalue(p)) { + cp = (Cell *) (p->narg[0]); + if (isfcn(cp)) + SYNTAX( "%s is a function, not an array", cp->nval ); + else if (!isarr(cp)) { + xfree(cp->sval); + cp->sval = (char *) makesymtab(NSYMTAB); + cp->tval = ARR; + } + } + return p; +} + +#define PA2NUM 50 /* max number of pat,pat patterns allowed */ +int paircnt; /* number of them in use */ +int pairstack[PA2NUM]; /* state of each pat,pat */ + +Node *pa2stat(Node *a, Node *b, Node *c) /* pat, pat {...} */ +{ + Node *x; + + x = node4(PASTAT2, a, b, c, itonp(paircnt)); + if (paircnt++ >= PA2NUM) + SYNTAX( "limited to %d pat,pat statements", PA2NUM ); + x->ntype = NSTAT; + return(x); +} + +Node *linkum(Node *a, Node *b) +{ + Node *c; + + if (errorflag) /* don't link things that are wrong */ + return a; + if (a == NULL) + return(b); + else if (b == NULL) + return(a); + for (c = a; c->nnext != NULL; c = c->nnext) + ; + c->nnext = b; + return(a); +} + +void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ +{ /* body of function, arglist */ + Node *p; + int n; + + if (isarr(v)) { + SYNTAX( "`%s' is an array name and a function name", v->nval ); + return; + } + if (isarg(v->nval) != -1) { + SYNTAX( "`%s' is both function name and argument name", v->nval ); + return; + } + + v->tval = FCN; + v->sval = (char *) st; + n = 0; /* count arguments */ + for (p = vl; p; p = p->nnext) + n++; + v->fval = n; + dprintf( ("defining func %s (%d args)\n", v->nval, n) ); +} + +int isarg(const char *s) /* is s in argument list for current function? */ +{ /* return -1 if not, otherwise arg # */ + extern Node *arglist; + Node *p = arglist; + int n; + + for (n = 0; p != 0; p = p->nnext, n++) + if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0) + return n; + return -1; +} + +int ptoi(void *p) /* convert pointer to integer */ +{ + return (int) (long) p; /* swearing that p fits, of course */ +} + +Node *itonp(int i) /* and vice versa */ +{ + return (Node *) (long) i; +} diff --git a/awk/proto.h b/awk/proto.h @@ -0,0 +1,196 @@ +/* $OpenBSD: proto.h,v 1.9 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +extern int yywrap(void); +extern void setfname(Cell *); +extern int constnode(Node *); +extern char *strnode(Node *); +extern Node *notnull(Node *); +extern int yyparse(void); + +extern int yylex(void); +extern void startreg(void); +extern int input(void); +extern void unput(int); +extern void unputstr(const char *); +extern int yylook(void); +extern int yyback(int *, int); +extern int yyinput(void); + +extern fa *makedfa(const char *, int); +extern fa *mkdfa(const char *, int); +extern int makeinit(fa *, int); +extern void penter(Node *); +extern void freetr(Node *); +extern int hexstr(uschar **); +extern int quoted(uschar **); +extern char *cclenter(const char *); +extern void overflo(const char *); +extern void cfoll(fa *, Node *); +extern int first(Node *); +extern void follow(Node *); +extern int member(int, const char *); +extern int match(fa *, const char *); +extern int pmatch(fa *, const char *); +extern int nematch(fa *, const char *); +extern Node *reparse(const char *); +extern Node *regexp(void); +extern Node *primary(void); +extern Node *concat(Node *); +extern Node *alt(Node *); +extern Node *unary(Node *); +extern int relex(void); +extern int cgoto(fa *, int, int); +extern void freefa(fa *); + +extern int pgetc(void); +extern char *cursource(void); + +extern Node *nodealloc(int); +extern Node *exptostat(Node *); +extern Node *node1(int, Node *); +extern Node *node2(int, Node *, Node *); +extern Node *node3(int, Node *, Node *, Node *); +extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *stat3(int, Node *, Node *, Node *); +extern Node *op2(int, Node *, Node *); +extern Node *op1(int, Node *); +extern Node *stat1(int, Node *); +extern Node *op3(int, Node *, Node *, Node *); +extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *stat2(int, Node *, Node *); +extern Node *stat4(int, Node *, Node *, Node *, Node *); +extern Node *celltonode(Cell *, int); +extern Node *rectonode(void); +extern Node *makearr(Node *); +extern Node *pa2stat(Node *, Node *, Node *); +extern Node *linkum(Node *, Node *); +extern void defn(Cell *, Node *, Node *); +extern int isarg(const char *); +extern char *tokname(int); +extern Cell *(*proctab[])(Node **, int); +extern int ptoi(void *); +extern Node *itonp(int); + +extern void syminit(void); +extern void arginit(int, char **); +extern void envinit(char **); +extern Array *makesymtab(int); +extern void freesymtab(Cell *); +extern void freeelem(Cell *, const char *); +extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *); +extern int hash(const char *, int); +extern void rehash(Array *); +extern Cell *lookup(const char *, Array *); +extern double setfval(Cell *, double); +extern void funnyvar(Cell *, const char *); +extern char *setsval(Cell *, const char *); +extern double getfval(Cell *); +extern char *getsval(Cell *); +extern char *getpssval(Cell *); /* for print */ +extern char *tostring(const char *); +extern char *qstring(const char *, int); + +extern void recinit(unsigned int); +extern void initgetrec(void); +extern void makefields(int, int); +extern void growfldtab(int n); +extern int getrec(char **, int *, int); +extern void nextfile(void); +extern int readrec(char **buf, int *bufsize, FILE *inf); +extern char *getargv(int); +extern void setclvar(char *); +extern void fldbld(void); +extern void cleanfld(int, int); +extern void newfld(int); +extern int refldbld(const char *, const char *); +extern void recbld(void); +extern Cell *fieldadr(int); +extern void yyerror(const char *); +extern void fpecatch(int); +extern void bracecheck(void); +extern void bcheck2(int, int, int); +extern void SYNTAX(const char *, ...); +extern void FATAL(const char *, ...); +extern void WARNING(const char *, ...); +extern void error(void); +extern void eprint(void); +extern void bclass(int); +extern double errcheck(double, const char *); +extern int isclvar(const char *); +extern int is_number(const char *); + +extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what); +extern void run(Node *); +extern Cell *execute(Node *); +extern Cell *program(Node **, int); +extern Cell *call(Node **, int); +extern Cell *copycell(Cell *); +extern Cell *arg(Node **, int); +extern Cell *jump(Node **, int); +extern Cell *awkgetline(Node **, int); +extern Cell *getnf(Node **, int); +extern Cell *array(Node **, int); +extern Cell *awkdelete(Node **, int); +extern Cell *intest(Node **, int); +extern Cell *matchop(Node **, int); +extern Cell *boolop(Node **, int); +extern Cell *relop(Node **, int); +extern void tfree(Cell *); +extern Cell *gettemp(void); +extern Cell *field(Node **, int); +extern Cell *indirect(Node **, int); +extern Cell *substr(Node **, int); +extern Cell *sindex(Node **, int); +extern int format(char **, int *, const char *, Node *); +extern Cell *awksprintf(Node **, int); +extern Cell *awkprintf(Node **, int); +extern Cell *arith(Node **, int); +extern double ipow(double, int); +extern Cell *incrdecr(Node **, int); +extern Cell *assign(Node **, int); +extern Cell *cat(Node **, int); +extern Cell *pastat(Node **, int); +extern Cell *dopa2(Node **, int); +extern Cell *split(Node **, int); +extern Cell *condexpr(Node **, int); +extern Cell *ifstat(Node **, int); +extern Cell *whilestat(Node **, int); +extern Cell *dostat(Node **, int); +extern Cell *forstat(Node **, int); +extern Cell *instat(Node **, int); +extern Cell *bltin(Node **, int); +extern Cell *printstat(Node **, int); +extern Cell *nullproc(Node **, int); +extern FILE *redirect(int, Node *); +extern FILE *openfile(int, const char *); +extern const char *filename(FILE *); +extern Cell *closefile(Node **, int); +extern void closeall(void); +extern Cell *sub(Node **, int); +extern Cell *gsub(Node **, int); + +extern FILE *popen(const char *, const char *); +extern int pclose(FILE *); diff --git a/awk/reallocarray.c b/awk/reallocarray.c @@ -0,0 +1,38 @@ +/* $OpenBSD: reallocarray.c,v 1.1 2014/05/08 21:43:49 deraadt Exp $ */ +/* + * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} diff --git a/awk/run.c b/awk/run.c @@ -0,0 +1,2027 @@ +/* $OpenBSD: run.c,v 1.35 2014/10/11 03:07:29 doug Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <setjmp.h> +#include <limits.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include "awk.h" +#include "util.h" +#include "ytab.h" + +#define tempfree(x) if (istemp(x)) tfree(x); else + +/* +#undef tempfree + +void tempfree(Cell *p) { + if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { + WARNING("bad csub %d in Cell %d %s", + p->csub, p->ctype, p->sval); + } + if (istemp(p)) + tfree(p); +} +*/ + +/* do we really need these? */ +/* #ifdef _NFILE */ +/* #ifndef FOPEN_MAX */ +/* #define FOPEN_MAX _NFILE */ +/* #endif */ +/* #endif */ +/* */ +/* #ifndef FOPEN_MAX */ +/* #define FOPEN_MAX 40 */ /* max number of open files */ +/* #endif */ +/* */ +/* #ifndef RAND_MAX */ +/* #define RAND_MAX 32767 */ /* all that ansi guarantees */ +/* #endif */ + +jmp_buf env; +int use_srandom; +extern int pairstack[]; +extern Awkfloat srand_seed; + +Node *winner = NULL; /* root of parse tree */ +Cell *tmps; /* free temporary cells for execution */ + +static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; +Cell *True = &truecell; +static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM }; +Cell *False = &falsecell; +static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM }; +Cell *jbreak = &breakcell; +static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM }; +Cell *jcont = &contcell; +static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM }; +Cell *jnext = &nextcell; +static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM }; +Cell *jnextfile = &nextfilecell; +static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM }; +Cell *jexit = &exitcell; +static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; +Cell *jret = &retcell; +static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE }; + +Node *curnode = NULL; /* the node being executed, for debugging */ + +void stdinit(void); +void flush_all(void); + +/* buffer memory management */ +int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, + const char *whatrtn) +/* pbuf: address of pointer to buffer being managed + * psiz: address of buffer size variable + * minlen: minimum length of buffer needed + * quantum: buffer size quantum + * pbptr: address of movable pointer into buffer, or 0 if none + * whatrtn: name of the calling routine if failure should cause fatal error + * + * return 0 for realloc failure, !=0 for success + */ +{ + if (minlen > *psiz) { + char *tbuf; + int rminlen = quantum ? minlen % quantum : 0; + int boff = pbptr ? *pbptr - *pbuf : 0; + /* round up to next multiple of quantum */ + if (rminlen) + minlen += quantum - rminlen; + tbuf = (char *) realloc(*pbuf, minlen); + dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); + if (tbuf == NULL) { + if (whatrtn) + FATAL("out of memory in %s", whatrtn); + return 0; + } + *pbuf = tbuf; + *psiz = minlen; + if (pbptr) + *pbptr = tbuf + boff; + } + return 1; +} + +void run(Node *a) /* execution of parse tree starts here */ +{ + stdinit(); + execute(a); + closeall(); +} + +Cell *execute(Node *u) /* execute a node of the parse tree */ +{ + Cell *(*proc)(Node **, int); + Cell *x; + Node *a; + + if (u == NULL) + return(True); + for (a = u; ; a = a->nnext) { + curnode = a; + if (isvalue(a)) { + x = (Cell *) (a->narg[0]); + if (isfld(x) && !donefld) + fldbld(); + else if (isrec(x) && !donerec) + recbld(); + return(x); + } + if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ + FATAL("illegal statement"); + proc = proctab[a->nobj-FIRSTTOKEN]; + x = (*proc)(a->narg, a->nobj); + if (isfld(x) && !donefld) + fldbld(); + else if (isrec(x) && !donerec) + recbld(); + if (isexpr(a)) + return(x); + if (isjump(x)) + return(x); + if (a->nnext == NULL) + return(x); + tempfree(x); + } +} + + +Cell *program(Node **a, int n) /* execute an awk program */ +{ /* a[0] = BEGIN, a[1] = body, a[2] = END */ + Cell *x; + + if (setjmp(env) != 0) + goto ex; + if (a[0]) { /* BEGIN */ + x = execute(a[0]); + if (isexit(x)) + return(True); + if (isjump(x)) + FATAL("illegal break, continue, next or nextfile from BEGIN"); + tempfree(x); + } + if (a[1] || a[2]) + while (getrec(&record, &recsize, 1) > 0) { + x = execute(a[1]); + if (isexit(x)) + break; + tempfree(x); + } + ex: + if (setjmp(env) != 0) /* handles exit within END */ + goto ex1; + if (a[2]) { /* END */ + x = execute(a[2]); + if (isbreak(x) || isnext(x) || iscont(x)) + FATAL("illegal break, continue, next or nextfile from END"); + tempfree(x); + } + ex1: + return(True); +} + +struct Frame { /* stack frame for awk function calls */ + int nargs; /* number of arguments in this call */ + Cell *fcncell; /* pointer to Cell for function */ + Cell **args; /* pointer to array of arguments after execute */ + Cell *retval; /* return value */ +}; + +#define NARGS 50 /* max args in a call */ + +struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ +int nframe = 0; /* number of frames allocated */ +struct Frame *fp = NULL; /* frame pointer. bottom level unused */ + +Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ +{ + static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE }; + int i, ncall, ndef; + int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ + Node *x; + Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ + Cell *y, *z, *fcn; + char *s; + + fcn = execute(a[0]); /* the function itself */ + s = fcn->nval; + if (!isfcn(fcn)) + FATAL("calling undefined function %s", s); + if (frame == NULL) { + fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); + if (frame == NULL) + FATAL("out of space for stack frames calling %s", s); + } + for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ + ncall++; + ndef = (int) fcn->fval; /* args in defn */ + dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + if (ncall > ndef) + WARNING("function %s called with %d args, uses only %d", + s, ncall, ndef); + if (ncall + ndef > NARGS) + FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); + for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ + dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + y = execute(x); + oargs[i] = y; + dprintf( ("args[%d]: %s %f <%s>, t=%o\n", + i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); + if (isfcn(y)) + FATAL("can't use function %s as argument in %s", y->nval, s); + if (isarr(y)) + args[i] = y; /* arrays by ref */ + else + args[i] = copycell(y); + tempfree(y); + } + for ( ; i < ndef; i++) { /* add null args for ones not provided */ + args[i] = gettemp(); + *args[i] = newcopycell; + } + fp++; /* now ok to up frame */ + if (fp >= frame + nframe) { + int dfp = fp - frame; /* old index */ + frame = (struct Frame *) + realloc((char *) frame, (nframe += 100) * sizeof(struct Frame)); + if (frame == NULL) + FATAL("out of space for stack frames in %s", s); + fp = frame + dfp; + } + fp->fcncell = fcn; + fp->args = args; + fp->nargs = ndef; /* number defined with (excess are locals) */ + fp->retval = gettemp(); + + dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + y = execute((Node *)(fcn->sval)); /* execute body */ + dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + + for (i = 0; i < ndef; i++) { + Cell *t = fp->args[i]; + if (isarr(t)) { + if (t->csub == CCOPY) { + if (i >= ncall) { + freesymtab(t); + t->csub = CTEMP; + tempfree(t); + } else { + oargs[i]->tval = t->tval; + oargs[i]->tval &= ~(STR|NUM|DONTFREE); + oargs[i]->sval = t->sval; + tempfree(t); + } + } + } else if (t != y) { /* kludge to prevent freeing twice */ + t->csub = CTEMP; + tempfree(t); + } else if (t == y && t->csub == CCOPY) { + t->csub = CTEMP; + tempfree(t); + freed = 1; + } + } + tempfree(fcn); + if (isexit(y) || isnext(y)) + return y; + if (freed == 0) { + tempfree(y); /* don't free twice! */ + } + z = fp->retval; /* return value */ + dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); + fp--; + return(z); +} + +Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ +{ + Cell *y; + + y = gettemp(); + y->csub = CCOPY; /* prevents freeing until call is over */ + y->nval = x->nval; /* BUG? */ + if (isstr(x)) + y->sval = tostring(x->sval); + y->fval = x->fval; + y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ + /* is DONTFREE right? */ + return y; +} + +Cell *arg(Node **a, int n) /* nth argument of a function */ +{ + + n = ptoi(a[0]); /* argument number, counting from 0 */ + dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); + if (n+1 > fp->nargs) + FATAL("argument #%d of function %s was not supplied", + n+1, fp->fcncell->nval); + return fp->args[n]; +} + +Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ +{ + Cell *y; + + switch (n) { + case EXIT: + if (a[0] != NULL) { + y = execute(a[0]); + errorflag = (int) getfval(y); + tempfree(y); + } + longjmp(env, 1); + case RETURN: + if (a[0] != NULL) { + y = execute(a[0]); + if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(fp->retval, getsval(y)); + fp->retval->fval = getfval(y); + fp->retval->tval |= NUM; + } + else if (y->tval & STR) + setsval(fp->retval, getsval(y)); + else if (y->tval & NUM) + setfval(fp->retval, getfval(y)); + else /* can't happen */ + FATAL("bad type variable %d", y->tval); + tempfree(y); + } + return(jret); + case NEXT: + return(jnext); + case NEXTFILE: + nextfile(); + return(jnextfile); + case BREAK: + return(jbreak); + case CONTINUE: + return(jcont); + default: /* can't happen */ + FATAL("illegal jump type %d", n); + } + return 0; /* not reached */ +} + +Cell *awkgetline(Node **a, int n) /* get next line from specific input */ +{ /* a[0] is variable, a[1] is operator, a[2] is filename */ + Cell *r, *x; + extern Cell **fldtab; + FILE *fp; + char *buf; + int bufsize = recsize; + int mode; + + if ((buf = (char *) malloc(bufsize)) == NULL) + FATAL("out of memory in getline"); + + fflush(stdout); /* in case someone is waiting for a prompt */ + r = gettemp(); + if (a[1] != NULL) { /* getline < file */ + x = execute(a[2]); /* filename */ + mode = ptoi(a[1]); + if (mode == '|') /* input pipe */ + mode = LE; /* arbitrary flag */ + fp = openfile(mode, getsval(x)); + tempfree(x); + if (fp == NULL) + n = -1; + else + n = readrec(&buf, &bufsize, fp); + if (n <= 0) { + ; + } else if (a[0] != NULL) { /* getline var <file */ + x = execute(a[0]); + setsval(x, buf); + tempfree(x); + } else { /* getline <file */ + setsval(fldtab[0], buf); + if (is_number(fldtab[0]->sval)) { + fldtab[0]->fval = atof(fldtab[0]->sval); + fldtab[0]->tval |= NUM; + } + } + } else { /* bare getline; use current input */ + if (a[0] == NULL) /* getline */ + n = getrec(&record, &recsize, 1); + else { /* getline var */ + n = getrec(&buf, &bufsize, 0); + x = execute(a[0]); + setsval(x, buf); + tempfree(x); + } + } + setfval(r, (Awkfloat) n); + free(buf); + return r; +} + +Cell *getnf(Node **a, int n) /* get NF */ +{ + if (donefld == 0) + fldbld(); + return (Cell *) a[0]; +} + +Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +{ + Cell *x, *y, *z; + char *s; + Node *np; + char *buf; + int bufsz = recsize; + int nsub = strlen(*SUBSEP); + + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in array"); + + x = execute(a[0]); /* Cell* for symbol table */ + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) + FATAL("out of memory for %s[%s...]", x->nval, buf); + strlcat(buf, s, bufsz); + if (np->nnext) + strlcat(buf, *SUBSEP, bufsz); + tempfree(y); + } + if (!isarr(x)) { + dprintf( ("making %s into an array\n", NN(x->nval)) ); + if (freeable(x)) + xfree(x->sval); + x->tval &= ~(STR|NUM|DONTFREE); + x->tval |= ARR; + x->sval = (char *) makesymtab(NSYMTAB); + } + z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); + z->ctype = OCELL; + z->csub = CVAR; + tempfree(x); + free(buf); + return(z); +} + +Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +{ + Cell *x, *y; + Node *np; + char *s; + int nsub = strlen(*SUBSEP); + + x = execute(a[0]); /* Cell* for symbol table */ + if (!isarr(x)) + return True; + if (a[1] == 0) { /* delete the elements, not the table */ + freesymtab(x); + x->tval &= ~STR; + x->tval |= ARR; + x->sval = (char *) makesymtab(NSYMTAB); + } else { + int bufsz = recsize; + char *buf; + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in adelete"); + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) + FATAL("out of memory deleting %s[%s...]", x->nval, buf); + strlcat(buf, s, bufsz); + if (np->nnext) + strlcat(buf, *SUBSEP, bufsz); + tempfree(y); + } + freeelem(x, buf); + free(buf); + } + tempfree(x); + return True; +} + +Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ +{ + Cell *x, *ap, *k; + Node *p; + char *buf; + char *s; + int bufsz = recsize; + int nsub = strlen(*SUBSEP); + + ap = execute(a[1]); /* array name */ + if (!isarr(ap)) { + dprintf( ("making %s into an array\n", ap->nval) ); + if (freeable(ap)) + xfree(ap->sval); + ap->tval &= ~(STR|NUM|DONTFREE); + ap->tval |= ARR; + ap->sval = (char *) makesymtab(NSYMTAB); + } + if ((buf = (char *) malloc(bufsz)) == NULL) { + FATAL("out of memory in intest"); + } + buf[0] = 0; + for (p = a[0]; p; p = p->nnext) { + x = execute(p); /* expr */ + s = getsval(x); + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) + FATAL("out of memory deleting %s[%s...]", x->nval, buf); + strlcat(buf, s, bufsz); + tempfree(x); + if (p->nnext) + strlcat(buf, *SUBSEP, bufsz); + } + k = lookup(buf, (Array *) ap->sval); + tempfree(ap); + free(buf); + if (k == NULL) + return(False); + else + return(True); +} + + +Cell *matchop(Node **a, int n) /* ~ and match() */ +{ + Cell *x, *y; + char *s, *t; + int i; + fa *pfa; + int (*mf)(fa *, const char *) = match, mode = 0; + + if (n == MATCHFCN) { + mf = pmatch; + mode = 1; + } + x = execute(a[1]); /* a[1] = target text */ + s = getsval(x); + if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */ + i = (*mf)((fa *) a[2], s); + else { + y = execute(a[2]); /* a[2] = regular expr */ + t = getsval(y); + pfa = makedfa(t, mode); + i = (*mf)(pfa, s); + tempfree(y); + } + tempfree(x); + if (n == MATCHFCN) { + int start = patbeg - s + 1; + if (patlen < 0) + start = 0; + setfval(rstartloc, (Awkfloat) start); + setfval(rlengthloc, (Awkfloat) patlen); + x = gettemp(); + x->tval = NUM; + x->fval = start; + return x; + } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) + return(True); + else + return(False); +} + + +Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ +{ + Cell *x, *y; + int i; + + x = execute(a[0]); + i = istrue(x); + tempfree(x); + switch (n) { + case BOR: + if (i) return(True); + y = execute(a[1]); + i = istrue(y); + tempfree(y); + if (i) return(True); + else return(False); + case AND: + if ( !i ) return(False); + y = execute(a[1]); + i = istrue(y); + tempfree(y); + if (i) return(True); + else return(False); + case NOT: + if (i) return(False); + else return(True); + default: /* can't happen */ + FATAL("unknown boolean operator %d", n); + } + return 0; /*NOTREACHED*/ +} + +Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ +{ + int i; + Cell *x, *y; + Awkfloat j; + + x = execute(a[0]); + y = execute(a[1]); + if (x->tval&NUM && y->tval&NUM) { + j = x->fval - y->fval; + i = j<0? -1: (j>0? 1: 0); + } else { + i = strcmp(getsval(x), getsval(y)); + } + tempfree(x); + tempfree(y); + switch (n) { + case LT: if (i<0) return(True); + else return(False); + case LE: if (i<=0) return(True); + else return(False); + case NE: if (i!=0) return(True); + else return(False); + case EQ: if (i == 0) return(True); + else return(False); + case GE: if (i>=0) return(True); + else return(False); + case GT: if (i>0) return(True); + else return(False); + default: /* can't happen */ + FATAL("unknown relational operator %d", n); + } + return 0; /*NOTREACHED*/ +} + +void tfree(Cell *a) /* free a tempcell */ +{ + if (freeable(a)) { + dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); + xfree(a->sval); + } + if (a == tmps) + FATAL("tempcell list is curdled"); + a->cnext = tmps; + tmps = a; +} + +Cell *gettemp(void) /* get a tempcell */ +{ int i; + Cell *x; + + if (!tmps) { + tmps = (Cell *) calloc(100, sizeof(Cell)); + if (!tmps) + FATAL("out of space for temporaries"); + for(i = 1; i < 100; i++) + tmps[i-1].cnext = &tmps[i]; + tmps[i-1].cnext = 0; + } + x = tmps; + tmps = x->cnext; + *x = tempcell; + return(x); +} + +Cell *indirect(Node **a, int n) /* $( a[0] ) */ +{ + Awkfloat val; + Cell *x; + int m; + char *s; + + x = execute(a[0]); + val = getfval(x); /* freebsd: defend against super large field numbers */ + if ((Awkfloat)INT_MAX < val) + FATAL("trying to access out of range field %s", x->nval); + m = (int) val; + if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ + FATAL("illegal field $(%s), name \"%s\"", s, x->nval); + /* BUG: can x->nval ever be null??? */ + tempfree(x); + x = fieldadr(m); + x->ctype = OCELL; /* BUG? why are these needed? */ + x->csub = CFLD; + return(x); +} + +Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ +{ + int k, m, n; + char *s; + int temp; + Cell *x, *y, *z = 0; + + x = execute(a[0]); + y = execute(a[1]); + if (a[2] != 0) + z = execute(a[2]); + s = getsval(x); + k = strlen(s) + 1; + if (k <= 1) { + tempfree(x); + tempfree(y); + if (a[2] != 0) { + tempfree(z); + } + x = gettemp(); + setsval(x, ""); + return(x); + } + m = (int) getfval(y); + if (m <= 0) + m = 1; + else if (m > k) + m = k; + tempfree(y); + if (a[2] != 0) { + n = (int) getfval(z); + tempfree(z); + } else + n = k - 1; + if (n < 0) + n = 0; + else if (n > k - m) + n = k - m; + dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); + y = gettemp(); + temp = s[n+m-1]; /* with thanks to John Linderman */ + s[n+m-1] = '\0'; + setsval(y, s + m - 1); + s[n+m-1] = temp; + tempfree(x); + return(y); +} + +Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ +{ + Cell *x, *y, *z; + char *s1, *s2, *p1, *p2, *q; + Awkfloat v = 0.0; + + x = execute(a[0]); + s1 = getsval(x); + y = execute(a[1]); + s2 = getsval(y); + + z = gettemp(); + for (p1 = s1; *p1 != '\0'; p1++) { + for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) + ; + if (*p2 == '\0') { + v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ + break; + } + } + tempfree(x); + tempfree(y); + setfval(z, v); + return(z); +} + +#define MAXNUMSIZE 50 + +int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ +{ + char *fmt; + char *p, *t; + const char *os; + Cell *x; + int flag = 0, n; + int fmtwd; /* format width */ + int fmtsz = recsize; + char *buf = *pbuf; + int bufsize = *pbufsize; + + os = s; + p = buf; + if ((fmt = (char *) malloc(fmtsz)) == NULL) + FATAL("out of memory in format()"); + while (*s) { + adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); + if (*s != '%') { + *p++ = *s++; + continue; + } + if (*(s+1) == '%') { + *p++ = '%'; + s += 2; + continue; + } + /* have to be real careful in case this is a huge number, eg, %100000d */ + fmtwd = atoi(s+1); + if (fmtwd < 0) + fmtwd = -fmtwd; + adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); + for (t = fmt; (*t++ = *s) != '\0'; s++) { + if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) + FATAL("format item %.30s... ran format() out of memory", os); + if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') + break; /* the ansi panoply */ + if (*s == '*') { + if (a == NULL) + FATAL("not enough args in printf(%s)", os); + x = execute(a); + a = a->nnext; + snprintf(t-1, fmt + fmtsz - (t-1), "%d", fmtwd=(int) getfval(x)); + if (fmtwd < 0) + fmtwd = -fmtwd; + adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); + t = fmt + strlen(fmt); + tempfree(x); + } + } + *t = '\0'; + if (fmtwd < 0) + fmtwd = -fmtwd; + adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); + + switch (*s) { + case 'f': case 'e': case 'g': case 'E': case 'G': + flag = 'f'; + break; + case 'd': case 'i': + flag = 'd'; + if(*(s-1) == 'l') break; + *(t-1) = 'l'; + *t = 'd'; + *++t = '\0'; + break; + case 'o': case 'x': case 'X': case 'u': + flag = *(s-1) == 'l' ? 'd' : 'u'; + break; + case 's': + flag = 's'; + break; + case 'c': + flag = 'c'; + break; + default: + WARNING("weird printf conversion %s", fmt); + flag = '?'; + break; + } + if (a == NULL) + FATAL("not enough args in printf(%s)", os); + x = execute(a); + a = a->nnext; + n = MAXNUMSIZE; + if (fmtwd > n) + n = fmtwd; + adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); + switch (flag) { + case '?': /* unknown, so dump it too */ + snprintf(p, buf + bufsize - p, "%s", fmt); + t = getsval(x); + n = strlen(t); + if (fmtwd > n) + n = fmtwd; + adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); + p += strlen(p); + snprintf(p, buf + bufsize - p, "%s", t); + break; + case 'f': snprintf(p, buf + bufsize - p, fmt, getfval(x)); break; + case 'd': snprintf(p, buf + bufsize - p, fmt, (long) getfval(x)); break; + case 'u': snprintf(p, buf + bufsize - p, fmt, (int) getfval(x)); break; + case 's': + t = getsval(x); + n = strlen(t); + if (fmtwd > n) + n = fmtwd; + if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) + FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); + snprintf(p, buf + bufsize - p, fmt, t); + break; + case 'c': + if (isnum(x)) { + if (getfval(x)) + snprintf(p, buf + bufsize - p, fmt, (int) getfval(x)); + else { + *p++ = '\0'; /* explicit null byte */ + *p = '\0'; /* next output will start here */ + } + } else + snprintf(p, buf + bufsize - p, fmt, getsval(x)[0]); + break; + default: + FATAL("can't happen: bad conversion %c in format()", flag); + } + tempfree(x); + p += strlen(p); + s++; + } + *p = '\0'; + free(fmt); + for ( ; a; a = a->nnext) /* evaluate any remaining args */ + execute(a); + *pbuf = buf; + *pbufsize = bufsize; + return p - buf; +} + +Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ +{ + Cell *x; + Node *y; + char *buf; + int bufsz=3*recsize; + + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in awksprintf"); + y = a[0]->nnext; + x = execute(a[0]); + if (format(&buf, &bufsz, getsval(x), y) == -1) + FATAL("sprintf string %.30s... too long. can't happen.", buf); + tempfree(x); + x = gettemp(); + x->sval = buf; + x->tval = STR; + return(x); +} + +Cell *awkprintf(Node **a, int n) /* printf */ +{ /* a[0] is list of args, starting with format string */ + /* a[1] is redirection operator, a[2] is redirection file */ + FILE *fp; + Cell *x; + Node *y; + char *buf; + int len; + int bufsz=3*recsize; + + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in awkprintf"); + y = a[0]->nnext; + x = execute(a[0]); + if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) + FATAL("printf string %.30s... too long. can't happen.", buf); + tempfree(x); + if (a[1] == NULL) { + /* fputs(buf, stdout); */ + fwrite(buf, len, 1, stdout); + if (ferror(stdout)) + FATAL("write error on stdout"); + } else { + fp = redirect(ptoi(a[1]), a[2]); + /* fputs(buf, fp); */ + fwrite(buf, len, 1, fp); + fflush(fp); + if (ferror(fp)) + FATAL("write error on %s", filename(fp)); + } + free(buf); + return(True); +} + +Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ +{ + Awkfloat i, j = 0; + double v; + Cell *x, *y, *z; + + x = execute(a[0]); + i = getfval(x); + tempfree(x); + if (n != UMINUS) { + y = execute(a[1]); + j = getfval(y); + tempfree(y); + } + z = gettemp(); + switch (n) { + case ADD: + i += j; + break; + case MINUS: + i -= j; + break; + case MULT: + i *= j; + break; + case DIVIDE: + if (j == 0) + FATAL("division by zero"); + i /= j; + break; + case MOD: + if (j == 0) + FATAL("division by zero in mod"); + modf(i/j, &v); + i = i - j * v; + break; + case UMINUS: + i = -i; + break; + case POWER: + if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ + i = ipow(i, (int) j); + else + i = errcheck(pow(i, j), "pow"); + break; + default: /* can't happen */ + FATAL("illegal arithmetic operator %d", n); + } + setfval(z, i); + return(z); +} + +double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ +{ + double v; + + if (n <= 0) + return 1; + v = ipow(x, n/2); + if (n % 2 == 0) + return v * v; + else + return x * v * v; +} + +Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ +{ + Cell *x, *z; + int k; + Awkfloat xf; + + x = execute(a[0]); + xf = getfval(x); + k = (n == PREINCR || n == POSTINCR) ? 1 : -1; + if (n == PREINCR || n == PREDECR) { + setfval(x, xf + k); + return(x); + } + z = gettemp(); + setfval(z, xf); + setfval(x, xf + k); + tempfree(x); + return(z); +} + +Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ +{ /* this is subtle; don't muck with it. */ + Cell *x, *y; + Awkfloat xf, yf; + double v; + + y = execute(a[1]); + x = execute(a[0]); + if (n == ASSIGN) { /* ordinary assignment */ + if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */ + ; /* leave alone unless it's a field */ + else if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(x, getsval(y)); + x->fval = getfval(y); + x->tval |= NUM; + } + else if (isstr(y)) + setsval(x, getsval(y)); + else if (isnum(y)) + setfval(x, getfval(y)); + else + funnyvar(y, "read value of"); + tempfree(y); + return(x); + } + xf = getfval(x); + yf = getfval(y); + switch (n) { + case ADDEQ: + xf += yf; + break; + case SUBEQ: + xf -= yf; + break; + case MULTEQ: + xf *= yf; + break; + case DIVEQ: + if (yf == 0) + FATAL("division by zero in /="); + xf /= yf; + break; + case MODEQ: + if (yf == 0) + FATAL("division by zero in %%="); + modf(xf/yf, &v); + xf = xf - yf * v; + break; + case POWEQ: + if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ + xf = ipow(xf, (int) yf); + else + xf = errcheck(pow(xf, yf), "pow"); + break; + default: + FATAL("illegal assignment operator %d", n); + break; + } + tempfree(y); + setfval(x, xf); + return(x); +} + +Cell *cat(Node **a, int q) /* a[0] cat a[1] */ +{ + Cell *x, *y, *z; + int n1, n2; + char *s; + size_t len; + + x = execute(a[0]); + y = execute(a[1]); + getsval(x); + getsval(y); + n1 = strlen(x->sval); + n2 = strlen(y->sval); + len = n1 + n2 + 1; + s = (char *) malloc(len); + if (s == NULL) + FATAL("out of space concatenating %.15s... and %.15s...", + x->sval, y->sval); + strlcpy(s, x->sval, len); + strlcpy(s+n1, y->sval, len - n1); + tempfree(x); + tempfree(y); + z = gettemp(); + z->sval = s; + z->tval = STR; + return(z); +} + +Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ +{ + Cell *x; + + if (a[0] == 0) + x = execute(a[1]); + else { + x = execute(a[0]); + if (istrue(x)) { + tempfree(x); + x = execute(a[1]); + } + } + return x; +} + +Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ +{ + Cell *x; + int pair; + + pair = ptoi(a[3]); + if (pairstack[pair] == 0) { + x = execute(a[0]); + if (istrue(x)) + pairstack[pair] = 1; + tempfree(x); + } + if (pairstack[pair] == 1) { + x = execute(a[1]); + if (istrue(x)) + pairstack[pair] = 0; + tempfree(x); + x = execute(a[2]); + return(x); + } + return(False); +} + +Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ +{ + Cell *x = 0, *y, *ap; + char *s; + int sep; + char *t, temp, num[50], *fs = 0; + int n, tempstat, arg3type; + + y = execute(a[0]); /* source string */ + s = getsval(y); + arg3type = ptoi(a[3]); + if (a[2] == 0) /* fs string */ + fs = *FS; + else if (arg3type == STRING) { /* split(str,arr,"string") */ + x = execute(a[2]); + fs = getsval(x); + } else if (arg3type == REGEXPR) + fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ + else + FATAL("illegal type of split"); + sep = *fs; + ap = execute(a[1]); /* array name */ + freesymtab(ap); + dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); + ap->tval &= ~STR; + ap->tval |= ARR; + ap->sval = (char *) makesymtab(NSYMTAB); + + n = 0; + if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { + /* split(s, a, //); have to arrange that it looks like empty sep */ + arg3type = 0; + fs = ""; + sep = 0; + } + if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ + fa *pfa; + if (arg3type == REGEXPR) { /* it's ready already */ + pfa = (fa *) a[2]; + } else { + pfa = makedfa(fs, 1); + } + if (nematch(pfa,s)) { + tempstat = pfa->initstat; + pfa->initstat = 2; + do { + n++; + snprintf(num, sizeof num, "%d", n); + temp = *patbeg; + *patbeg = '\0'; + if (is_number(s)) + setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + else + setsymtab(num, s, 0.0, STR, (Array *) ap->sval); + *patbeg = temp; + s = patbeg + patlen; + if (*(patbeg+patlen-1) == 0 || *s == 0) { + n++; + snprintf(num, sizeof num, "%d", n); + setsymtab(num, "", 0.0, STR, (Array *) ap->sval); + pfa->initstat = tempstat; + goto spdone; + } + } while (nematch(pfa,s)); + pfa->initstat = tempstat; /* bwk: has to be here to reset */ + /* cf gsub and refldbld */ + } + n++; + snprintf(num, sizeof num, "%d", n); + if (is_number(s)) + setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + else + setsymtab(num, s, 0.0, STR, (Array *) ap->sval); + spdone: + pfa = NULL; + } else if (sep == ' ') { + for (n = 0; ; ) { + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; + if (*s == 0) + break; + n++; + t = s; + do + s++; + while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); + temp = *s; + *s = '\0'; + snprintf(num, sizeof num, "%d", n); + if (is_number(t)) + setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + else + setsymtab(num, t, 0.0, STR, (Array *) ap->sval); + *s = temp; + if (*s != 0) + s++; + } + } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ + for (n = 0; *s != 0; s++) { + char buf[2]; + n++; + snprintf(num, sizeof num, "%d", n); + buf[0] = *s; + buf[1] = 0; + if (isdigit((uschar)buf[0])) + setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); + else + setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); + } + } else if (*s != 0) { + for (;;) { + n++; + t = s; + while (*s != sep && *s != '\n' && *s != '\0') + s++; + temp = *s; + *s = '\0'; + snprintf(num, sizeof num, "%d", n); + if (is_number(t)) + setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + else + setsymtab(num, t, 0.0, STR, (Array *) ap->sval); + *s = temp; + if (*s++ == 0) + break; + } + } + tempfree(ap); + tempfree(y); + if (a[2] != 0 && arg3type == STRING) { + tempfree(x); + } + x = gettemp(); + x->tval = NUM; + x->fval = n; + return(x); +} + +Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ +{ + Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x); + x = execute(a[1]); + } else { + tempfree(x); + x = execute(a[2]); + } + return(x); +} + +Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ +{ + Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x); + x = execute(a[1]); + } else if (a[2] != 0) { + tempfree(x); + x = execute(a[2]); + } + return(x); +} + +Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ +{ + Cell *x; + + for (;;) { + x = execute(a[0]); + if (!istrue(x)) + return(x); + tempfree(x); + x = execute(a[1]); + if (isbreak(x)) { + x = True; + return(x); + } + if (isnext(x) || isexit(x) || isret(x)) + return(x); + tempfree(x); + } +} + +Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ +{ + Cell *x; + + for (;;) { + x = execute(a[0]); + if (isbreak(x)) + return True; + if (isnext(x) || isexit(x) || isret(x)) + return(x); + tempfree(x); + x = execute(a[1]); + if (!istrue(x)) + return(x); + tempfree(x); + } +} + +Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ +{ + Cell *x; + + x = execute(a[0]); + tempfree(x); + for (;;) { + if (a[1]!=0) { + x = execute(a[1]); + if (!istrue(x)) return(x); + else tempfree(x); + } + x = execute(a[3]); + if (isbreak(x)) /* turn off break */ + return True; + if (isnext(x) || isexit(x) || isret(x)) + return(x); + tempfree(x); + x = execute(a[2]); + tempfree(x); + } +} + +Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ +{ + Cell *x, *vp, *arrayp, *cp, *ncp; + Array *tp; + int i; + + vp = execute(a[0]); + arrayp = execute(a[1]); + if (!isarr(arrayp)) { + return True; + } + tp = (Array *) arrayp->sval; + tempfree(arrayp); + for (i = 0; i < tp->size; i++) { /* this routine knows too much */ + for (cp = tp->tab[i]; cp != NULL; cp = ncp) { + setsval(vp, cp->nval); + ncp = cp->cnext; + x = execute(a[2]); + if (isbreak(x)) { + tempfree(vp); + return True; + } + if (isnext(x) || isexit(x) || isret(x)) { + tempfree(vp); + return(x); + } + tempfree(x); + } + } + return True; +} + +Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ +{ + Cell *x, *y; + Awkfloat u; + int t; + Awkfloat tmp; + char *p, *buf; + Node *nextarg; + FILE *fp; + + t = ptoi(a[0]); + x = execute(a[1]); + nextarg = a[1]->nnext; + switch (t) { + case FLENGTH: + if (isarr(x)) + u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ + else + u = strlen(getsval(x)); + break; + case FLOG: + u = errcheck(log(getfval(x)), "log"); break; + case FINT: + modf(getfval(x), &u); break; + case FEXP: + u = errcheck(exp(getfval(x)), "exp"); break; + case FSQRT: + u = errcheck(sqrt(getfval(x)), "sqrt"); break; + case FSIN: + u = sin(getfval(x)); break; + case FCOS: + u = cos(getfval(x)); break; + case FATAN: + if (nextarg == 0) { + WARNING("atan2 requires two arguments; returning 1.0"); + u = 1.0; + } else { + y = execute(a[1]->nnext); + u = atan2(getfval(x), getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + } + break; + case FCOMPL: + u = ~((int)getfval(x)); + break; + case FAND: + if (nextarg == 0) { + WARNING("and requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) & ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FFOR: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) | ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FXOR: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) ^ ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FLSHIFT: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) << ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FRSHIFT: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) >> ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FSYSTEM: + fflush(stdout); /* in case something is buffered already */ + u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */ + break; + case FRAND: + if (use_srandom) + u = (Awkfloat) (random() % RAND_MAX) / RAND_MAX; + else + u = (Awkfloat)arc4random() / 0xffffffff; + break; + case FSRAND: + if (isrec(x)) /* no argument provided, want arc4random() */ + use_srandom = 0; + else { + use_srandom = 1; + u = getfval(x); + tmp = u; + srandom((unsigned int) u); + u = srand_seed; + srand_seed = tmp; + } + break; + case FTOUPPER: + case FTOLOWER: + buf = tostring(getsval(x)); + if (t == FTOUPPER) { + for (p = buf; *p; p++) + if (islower((uschar) *p)) + *p = toupper((uschar)*p); + } else { + for (p = buf; *p; p++) + if (isupper((uschar) *p)) + *p = tolower((uschar)*p); + } + tempfree(x); + x = gettemp(); + setsval(x, buf); + free(buf); + return x; + case FFLUSH: + if (isrec(x) || strlen(getsval(x)) == 0) { + flush_all(); /* fflush() or fflush("") -> all */ + u = 0; + } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) + u = EOF; + else + u = fflush(fp); + break; + default: /* can't happen */ + FATAL("illegal function type %d", t); + break; + } + tempfree(x); + x = gettemp(); + setfval(x, u); + if (nextarg != 0) { + WARNING("warning: function has too many arguments"); + for ( ; nextarg; nextarg = nextarg->nnext) + execute(nextarg); + } + return(x); +} + +Cell *printstat(Node **a, int n) /* print a[0] */ +{ + Node *x; + Cell *y; + FILE *fp; + + if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */ + fp = stdout; + else + fp = redirect(ptoi(a[1]), a[2]); + for (x = a[0]; x != NULL; x = x->nnext) { + y = execute(x); + fputs(getpssval(y), fp); + tempfree(y); + if (x->nnext == NULL) + fputs(*ORS, fp); + else + fputs(*OFS, fp); + } + if (a[1] != 0) + fflush(fp); + if (ferror(fp)) + FATAL("write error on %s", filename(fp)); + return(True); +} + +Cell *nullproc(Node **a, int n) +{ + n = n; + a = a; + return 0; +} + + +FILE *redirect(int a, Node *b) /* set up all i/o redirections */ +{ + FILE *fp; + Cell *x; + char *fname; + + x = execute(b); + fname = getsval(x); + fp = openfile(a, fname); + if (fp == NULL) + FATAL("can't open file %s", fname); + tempfree(x); + return fp; +} + +struct files { + FILE *fp; + const char *fname; + int mode; /* '|', 'a', 'w' => LE/LT, GT */ +} *files; + +int nfiles; + +void stdinit(void) /* in case stdin, etc., are not constants */ +{ + nfiles = FOPEN_MAX; + files = calloc(nfiles, sizeof(*files)); + if (files == NULL) + FATAL("can't allocate file memory for %u files", nfiles); + files[0].fp = stdin; + files[0].fname = "/dev/stdin"; + files[0].mode = LT; + files[1].fp = stdout; + files[1].fname = "/dev/stdout"; + files[1].mode = GT; + files[2].fp = stderr; + files[2].fname = "/dev/stderr"; + files[2].mode = GT; +} + +FILE *openfile(int a, const char *us) +{ + const char *s = us; + int i, m; + FILE *fp = 0; + + if (*s == '\0') + FATAL("null file name in print or getline"); + for (i=0; i < nfiles; i++) + if (files[i].fname && strcmp(s, files[i].fname) == 0) { + if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) + return files[i].fp; + if (a == FFLUSH) + return files[i].fp; + } + if (a == FFLUSH) /* didn't find it, so don't create it! */ + return NULL; + + for (i=0; i < nfiles; i++) + if (files[i].fp == 0) + break; + if (i >= nfiles) { + struct files *nf; + int nnf = nfiles + FOPEN_MAX; + nf = reallocarray(files, nnf, sizeof(*nf)); + if (nf == NULL) + FATAL("cannot grow files for %s and %d files", s, nnf); + memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); + nfiles = nnf; + files = nf; + } + fflush(stdout); /* force a semblance of order */ + m = a; + if (a == GT) { + fp = fopen(s, "w"); + } else if (a == APPEND) { + fp = fopen(s, "a"); + m = GT; /* so can mix > and >> */ + } else if (a == '|') { /* output pipe */ + fp = popen(s, "w"); + } else if (a == LE) { /* input pipe */ + fp = popen(s, "r"); + } else if (a == LT) { /* getline <file */ + fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ + } else /* can't happen */ + FATAL("illegal redirection %d", a); + if (fp != NULL) { + files[i].fname = tostring(s); + files[i].fp = fp; + files[i].mode = m; + } + return fp; +} + +const char *filename(FILE *fp) +{ + int i; + + for (i = 0; i < nfiles; i++) + if (fp == files[i].fp) + return files[i].fname; + return "???"; +} + +Cell *closefile(Node **a, int n) +{ + Cell *x; + int i, stat; + + n = n; + x = execute(a[0]); + getsval(x); + stat = -1; + for (i = 0; i < nfiles; i++) { + if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { + if (ferror(files[i].fp)) + WARNING( "i/o error occurred on %s", files[i].fname ); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp); + else + stat = fclose(files[i].fp); + if (stat == EOF) + WARNING( "i/o error occurred closing %s", files[i].fname ); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); + files[i].fname = NULL; /* watch out for ref thru this */ + files[i].fp = NULL; + } + } + tempfree(x); + x = gettemp(); + setfval(x, (Awkfloat) stat); + return(x); +} + +void closeall(void) +{ + int i, stat; + + for (i = 0; i < FOPEN_MAX; i++) { + if (files[i].fp) { + if (ferror(files[i].fp)) + WARNING( "i/o error occurred on %s", files[i].fname ); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp); + else + stat = fclose(files[i].fp); + if (stat == EOF) + WARNING( "i/o error occurred while closing %s", files[i].fname ); + } + } +} + +void flush_all(void) +{ + int i; + + for (i = 0; i < nfiles; i++) + if (files[i].fp) + fflush(files[i].fp); +} + +void backsub(char **pb_ptr, char **sptr_ptr); + +Cell *sub(Node **a, int nnn) /* substitute command */ +{ + char *sptr, *pb, *q; + Cell *x, *y, *result; + char *t, *buf; + fa *pfa; + int bufsz = recsize; + + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in sub"); + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y); + } + y = execute(a[2]); /* replacement string */ + result = False; + if (pmatch(pfa, t)) { + sptr = t; + adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); + pb = buf; + while (sptr < patbeg) + *pb++ = *sptr++; + sptr = getsval(y); + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + *pb = '\0'; + if (pb > buf + bufsz) + FATAL("sub result1 %.30s too big; can't happen", buf); + sptr = patbeg + patlen; + if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { + adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); + while ((*pb++ = *sptr++) != 0) + ; + } + if (pb > buf + bufsz) + FATAL("sub result2 %.30s too big; can't happen", buf); + setsval(x, buf); /* BUG: should be able to avoid copy */ + result = True; + } + tempfree(x); + tempfree(y); + free(buf); + return result; +} + +Cell *gsub(Node **a, int nnn) /* global substitute */ +{ + Cell *x, *y; + char *rptr, *sptr, *t, *pb, *q; + char *buf; + fa *pfa; + int mflag, tempstat, num; + int bufsz = recsize; + + if ((buf = (char *) malloc(bufsz)) == NULL) + FATAL("out of memory in gsub"); + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y); + } + y = execute(a[2]); /* replacement string */ + if (pmatch(pfa, t)) { + tempstat = pfa->initstat; + pfa->initstat = 2; + pb = buf; + rptr = getsval(y); + do { + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + } + if (*t == 0) /* at end */ + goto done; + adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); + *pb++ = *t++; + if (pb > buf + bufsz) /* BUG: not sure of this test */ + FATAL("gsub result0 %.30s too big; can't happen", buf); + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); + while (sptr < patbeg) + *pb++ = *sptr++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + t = patbeg + patlen; + if (patlen == 0 || *t == 0 || *(t-1) == 0) + goto done; + if (pb > buf + bufsz) + FATAL("gsub result1 %.30s too big; can't happen", buf); + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); + while ((*pb++ = *sptr++) != 0) + ; + done: if (pb < buf + bufsz) + *pb = '\0'; + else if (*(pb-1) != '\0') + FATAL("gsub result2 %.30s truncated; can't happen", buf); + setsval(x, buf); /* BUG: should be able to avoid copy + free */ + pfa->initstat = tempstat; + } + tempfree(x); + tempfree(y); + x = gettemp(); + x->tval = NUM; + x->fval = num; + free(buf); + return(x); +} + +void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */ +{ /* sptr[0] == '\\' */ + char *pb = *pb_ptr, *sptr = *sptr_ptr; + + if (sptr[1] == '\\') { + if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ + *pb++ = '\\'; + *pb++ = '&'; + sptr += 4; + } else if (sptr[2] == '&') { /* \\& -> \ + matched */ + *pb++ = '\\'; + sptr += 2; + } else { /* \\x -> \\x */ + *pb++ = *sptr++; + *pb++ = *sptr++; + } + } else if (sptr[1] == '&') { /* literal & */ + sptr++; + *pb++ = *sptr++; + } else /* literal \ */ + *pb++ = *sptr++; + + *pb_ptr = pb; + *sptr_ptr = sptr; +} diff --git a/awk/strlcat.c b/awk/strlcat.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> +#include <sys/types.h> + +#include "util.h" + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + return(dlen + (s - src)); /* count does not include NUL */ +} diff --git a/awk/strlcpy.c b/awk/strlcpy.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> +#include <sys/types.h> + +#include "util.h" + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} diff --git a/awk/tran.c b/awk/tran.c @@ -0,0 +1,458 @@ +/* $OpenBSD: tran.c,v 1.15 2011/09/28 19:27:18 millert Exp $ */ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ + +#define DEBUG +#include <stdio.h> +#include <math.h> +#include <ctype.h> +#include <string.h> +#include <stdlib.h> +#include "awk.h" +#include "ytab.h" + +#define FULLTAB 2 /* rehash when table gets this x full */ +#define GROWTAB 4 /* grow table by this factor */ + +Array *symtab; /* main symbol table */ + +char **FS; /* initial field sep */ +char **RS; /* initial record sep */ +char **OFS; /* output field sep */ +char **ORS; /* output record sep */ +char **OFMT; /* output format for numbers */ +char **CONVFMT; /* format for conversions in getsval */ +Awkfloat *NF; /* number of fields in current record */ +Awkfloat *NR; /* number of current record */ +Awkfloat *FNR; /* number of current record in current file */ +char **FILENAME; /* current filename argument */ +Awkfloat *ARGC; /* number of arguments from command line */ +char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ +Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ +Awkfloat *RLENGTH; /* length of same */ + +Cell *fsloc; /* FS */ +Cell *nrloc; /* NR */ +Cell *nfloc; /* NF */ +Cell *fnrloc; /* FNR */ +Array *ARGVtab; /* symbol table containing ARGV[...] */ +Array *ENVtab; /* symbol table containing ENVIRON[...] */ +Cell *rstartloc; /* RSTART */ +Cell *rlengthloc; /* RLENGTH */ +Cell *symtabloc; /* SYMTAB */ + +Cell *nullloc; /* a guaranteed empty cell */ +Node *nullnode; /* zero&null, converted into a node for comparisons */ +Cell *literal0; + +extern Cell **fldtab; + +void syminit(void) /* initialize symbol table with builtin vars */ +{ + literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); + /* this is used for if(x)... tests: */ + nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); + nullnode = celltonode(nullloc, CCON); + + fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); + FS = &fsloc->sval; + RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; + ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; + nfloc = setsymtab("NF", "", 0.0, NUM, symtab); + NF = &nfloc->fval; + nrloc = setsymtab("NR", "", 0.0, NUM, symtab); + NR = &nrloc->fval; + fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); + FNR = &fnrloc->fval; + SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); + RSTART = &rstartloc->fval; + rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); + RLENGTH = &rlengthloc->fval; + symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + symtabloc->sval = (char *) symtab; +} + +void arginit(int ac, char **av) /* set up ARGV and ARGC */ +{ + Cell *cp; + int i; + char temp[50]; + + ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; + cp = setsymtab("ARGV", "", 0.0, ARR, symtab); + ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ + cp->sval = (char *) ARGVtab; + for (i = 0; i < ac; i++) { + snprintf(temp, sizeof temp, "%d", i); + if (is_number(*av)) + setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); + else + setsymtab(temp, *av, 0.0, STR, ARGVtab); + av++; + } +} + +void envinit(char **envp) /* set up ENVIRON variable */ +{ + Cell *cp; + char *p; + + cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); + ENVtab = makesymtab(NSYMTAB); + cp->sval = (char *) ENVtab; + for ( ; *envp; envp++) { + if ((p = strchr(*envp, '=')) == NULL) + continue; + if( p == *envp ) /* no left hand side name in env string */ + continue; + *p++ = 0; /* split into two strings at = */ + if (is_number(p)) + setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); + else + setsymtab(*envp, p, 0.0, STR, ENVtab); + p[-1] = '='; /* restore in case env is passed down to a shell */ + } +} + +Array *makesymtab(int n) /* make a new symbol table */ +{ + Array *ap; + Cell **tp; + + ap = (Array *) malloc(sizeof(Array)); + tp = (Cell **) calloc(n, sizeof(Cell *)); + if (ap == NULL || tp == NULL) + FATAL("out of space in makesymtab"); + ap->nelem = 0; + ap->size = n; + ap->tab = tp; + return(ap); +} + +void freesymtab(Cell *ap) /* free a symbol table */ +{ + Cell *cp, *temp; + Array *tp; + int i; + + if (!isarr(ap)) + return; + tp = (Array *) ap->sval; + if (tp == NULL) + return; + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp != NULL; cp = temp) { + xfree(cp->nval); + if (freeable(cp)) + xfree(cp->sval); + temp = cp->cnext; /* avoids freeing then using */ + free(cp); + tp->nelem--; + } + tp->tab[i] = 0; + } + if (tp->nelem != 0) + WARNING("can't happen: inconsistent element count freeing %s", ap->nval); + free(tp->tab); + free(tp); +} + +void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ +{ + Array *tp; + Cell *p, *prev = NULL; + int h; + + tp = (Array *) ap->sval; + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + if (strcmp(s, p->nval) == 0) { + if (prev == NULL) /* 1st one */ + tp->tab[h] = p->cnext; + else /* middle somewhere */ + prev->cnext = p->cnext; + if (freeable(p)) + xfree(p->sval); + free(p->nval); + free(p); + tp->nelem--; + return; + } +} + +Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) +{ + int h; + Cell *p; + + if (n != NULL && (p = lookup(n, tp)) != NULL) { + dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", + (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) ); + return(p); + } + p = (Cell *) malloc(sizeof(Cell)); + if (p == NULL) + FATAL("out of space for symbol table at %s", n); + p->nval = tostring(n); + p->sval = s ? tostring(s) : tostring(""); + p->fval = f; + p->tval = t; + p->csub = CUNK; + p->ctype = OCELL; + tp->nelem++; + if (tp->nelem > FULLTAB * tp->size) + rehash(tp); + h = hash(n, tp->size); + p->cnext = tp->tab[h]; + tp->tab[h] = p; + dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", + (void*)p, p->nval, p->sval, p->fval, p->tval) ); + return(p); +} + +int hash(const char *s, int n) /* form hash value for string s */ +{ + unsigned hashval; + + for (hashval = 0; *s != '\0'; s++) + hashval = (*s + 31 * hashval); + return hashval % n; +} + +void rehash(Array *tp) /* rehash items in small table into big one */ +{ + int i, nh, nsz; + Cell *cp, *op, **np; + + nsz = GROWTAB * tp->size; + np = (Cell **) calloc(nsz, sizeof(Cell *)); + if (np == NULL) /* can't do it, but can keep running. */ + return; /* someone else will run out later. */ + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp; cp = op) { + op = cp->cnext; + nh = hash(cp->nval, nsz); + cp->cnext = np[nh]; + np[nh] = cp; + } + } + free(tp->tab); + tp->tab = np; + tp->size = nsz; +} + +Cell *lookup(const char *s, Array *tp) /* look for s in tp */ +{ + Cell *p; + int h; + + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; p = p->cnext) + if (strcmp(s, p->nval) == 0) + return(p); /* found it */ + return(NULL); /* not found */ +} + +Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ +{ + int fldno; + + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, "assign to"); + if (isfld(vp)) { + donerec = 0; /* mark $0 invalid */ + fldno = atoi(vp->nval); + if (fldno > *NF) + newfld(fldno); + dprintf( ("setting field %d to %g\n", fldno, f) ); + } else if (isrec(vp)) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } + if (freeable(vp)) + xfree(vp->sval); /* free any previous string */ + vp->tval &= ~STR; /* mark string invalid */ + vp->tval |= NUM; /* mark number ok */ + dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) ); + return vp->fval = f; +} + +void funnyvar(Cell *vp, const char *rw) +{ + if (isarr(vp)) + FATAL("can't %s %s; it's an array name.", rw, vp->nval); + if (vp->tval & FCN) + FATAL("can't %s %s; it's a function.", rw, vp->nval); + WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", + vp, vp->nval, vp->sval, vp->fval, vp->tval); +} + +char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ +{ + char *t; + int fldno; + + dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", + (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, "assign to"); + if (isfld(vp)) { + donerec = 0; /* mark $0 invalid */ + fldno = atoi(vp->nval); + if (fldno > *NF) + newfld(fldno); + dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) ); + } else if (isrec(vp)) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } + t = tostring(s); /* in case it's self-assign */ + if (freeable(vp)) + xfree(vp->sval); + vp->tval &= ~NUM; + vp->tval |= STR; + vp->tval &= ~DONTFREE; + dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", + (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) ); + return(vp->sval = t); +} + +Awkfloat getfval(Cell *vp) /* get float val of a Cell */ +{ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, "read value of"); + if (isfld(vp) && donefld == 0) + fldbld(); + else if (isrec(vp) && donerec == 0) + recbld(); + if (!isnum(vp)) { /* not a number */ + vp->fval = atof(vp->sval); /* best guess */ + if (is_number(vp->sval) && !(vp->tval&CON)) + vp->tval |= NUM; /* make NUM only sparingly */ + } + dprintf( ("getfval %p: %s = %g, t=%o\n", + (void*)vp, NN(vp->nval), vp->fval, vp->tval) ); + return(vp->fval); +} + +static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ +{ + int n; + double dtemp; + + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, "read value of"); + if (isfld(vp) && donefld == 0) + fldbld(); + else if (isrec(vp) && donerec == 0) + recbld(); + if (isstr(vp) == 0) { + if (freeable(vp)) + xfree(vp->sval); + if (modf(vp->fval, &dtemp) == 0) /* it's integral */ + n = asprintf(&vp->sval, "%.30g", vp->fval); + else + n = asprintf(&vp->sval, *fmt, vp->fval); + if (n == -1) + FATAL("out of space in get_str_val"); + vp->tval &= ~DONTFREE; + vp->tval |= STR; + } + dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", + (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) ); + return(vp->sval); +} + +char *getsval(Cell *vp) /* get string val of a Cell */ +{ + return get_str_val(vp, CONVFMT); +} + +char *getpssval(Cell *vp) /* get string val of a Cell for print */ +{ + return get_str_val(vp, OFMT); +} + + +char *tostring(const char *s) /* make a copy of string s */ +{ + char *p; + + p = strdup(s); + if (p == NULL) + FATAL("out of space in tostring on %s", s); + return p; +} + +char *qstring(const char *is, int delim) /* collect string up to next delim */ +{ + const char *os = is; + int c, n; + uschar *s = (uschar *) is; + uschar *buf, *bp; + + if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL) + FATAL( "out of space in qstring(%s)", s); + for (bp = buf; (c = *s) != delim; s++) { + if (c == '\n') + SYNTAX( "newline in string %.20s...", os ); + else if (c != '\\') + *bp++ = c; + else { /* \something */ + c = *++s; + if (c == 0) { /* \ at end */ + *bp++ = '\\'; + break; /* for loop */ + } + switch (c) { + case '\\': *bp++ = '\\'; break; + case 'n': *bp++ = '\n'; break; + case 't': *bp++ = '\t'; break; + case 'b': *bp++ = '\b'; break; + case 'f': *bp++ = '\f'; break; + case 'r': *bp++ = '\r'; break; + default: + if (!isdigit(c)) { + *bp++ = c; + break; + } + n = c - '0'; + if (isdigit(s[1])) { + n = 8 * n + *++s - '0'; + if (isdigit(s[1])) + n = 8 * n + *++s - '0'; + } + *bp++ = n; + break; + } + } + } + *bp++ = 0; + return (char *) buf; +} diff --git a/awk/util.h b/awk/util.h @@ -0,0 +1,8 @@ +#include <sys/stat.h> + +#include <stdint.h> + +uint32_t arc4random(void); +void *reallocarray(void *, size_t, size_t); +size_t strlcat(char *, const char *, size_t); +size_t strlcpy(char *, const char *, size_t); diff --git a/sed/Makefile b/sed/Makefile @@ -0,0 +1,10 @@ +OBJ = compile.o fgetln.o main.o misc.o process.o reallocarray.o strlcat.o strlcpy.o +BIN = sed + +all: $(BIN) + +$(BIN): $(OBJ) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ) + +clean: + rm -f $(BIN) $(OBJ) diff --git a/sed/POSIX b/sed/POSIX @@ -0,0 +1,199 @@ +# $OpenBSD: POSIX,v 1.2 1996/06/26 05:39:04 deraadt Exp $ +# from: @(#)POSIX 8.1 (Berkeley) 6/6/93 + +Comments on the IEEE P1003.2 Draft 12 + Part 2: Shell and Utilities + Section 4.55: sed - Stream editor + +Diomidis Spinellis <dds@doc.ic.ac.uk> +Keith Bostic <bostic@cs.berkeley.edu> + +In the following paragraphs, "wrong" usually means "inconsistent with +historic practice", as most of the following comments refer to +undocumented inconsistencies between the historical versions of sed and +the POSIX 1003.2 standard. All the comments are notes taken while +implementing a POSIX-compatible version of sed, and should not be +interpreted as official opinions or criticism towards the POSIX committee. +All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2. + + 1. 32V and BSD derived implementations of sed strip the text + arguments of the a, c and i commands of their initial blanks, + i.e. + + #!/bin/sed -f + a\ + foo\ + \ indent\ + bar + + produces: + + foo + indent + bar + + POSIX does not specify this behavior as the System V versions of + sed do not do this stripping. The argument against stripping is + that it is difficult to write sed scripts that have leading blanks + if they are stripped. The argument for stripping is that it is + difficult to write readable sed scripts unless indentation is allowed + and ignored, and leading whitespace is obtainable by entering a + backslash in front of it. This implementation follows the BSD + historic practice. + + 2. Historical versions of sed required that the w flag be the last + flag to an s command as it takes an additional argument. This + is obvious, but not specified in POSIX. + + 3. Historical versions of sed required that whitespace follow a w + flag to an s command. This is not specified in POSIX. This + implementation permits whitespace but does not require it. + + 4. Historical versions of sed permitted any number of whitespace + characters to follow the w command. This is not specified in + POSIX. This implementation permits whitespace but does not + require it. + + 5. The rule for the l command differs from historic practice. Table + 2-15 includes the various ANSI C escape sequences, including \\ + for backslash. Some historical versions of sed displayed two + digit octal numbers, too, not three as specified by POSIX. POSIX + is a cleanup, and is followed by this implementation. + + 6. The POSIX specification for ! does not specify that for a single + command the command must not contain an address specification + whereas the command list can contain address specifications. The + specification for ! implies that "3!/hello/p" works, and it never + has, historically. Note, + + 3!{ + /hello/p + } + + does work. + + 7. POSIX does not specify what happens with consecutive ! commands + (e.g. /foo/!!!p). Historic implementations allow any number of + !'s without changing the behaviour. (It seems logical that each + one might reverse the behaviour.) This implementation follows + historic practice. + + 8. Historic versions of sed permitted commands to be separated + by semi-colons, e.g. 'sed -ne '1p;2p;3q' printed the first + three lines of a file. This is not specified by POSIX. + Note, the ; command separator is not allowed for the commands + a, c, i, w, r, :, b, t, # and at the end of a w flag in the s + command. This implementation follows historic practice and + implements the ; separator. + + 9. Historic versions of sed terminated the script if EOF was reached + during the execution of the 'n' command, i.e.: + + sed -e ' + n + i\ + hello + ' </dev/null + + did not produce any output. POSIX does not specify this behavior. + This implementation follows historic practice. + +10. Deleted. + +11. Historical implementations do not output the change text of a c + command in the case of an address range whose first line number + is greater than the second (e.g. 3,1). POSIX requires that the + text be output. Since the historic behavior doesn't seem to have + any particular purpose, this implementation follows the POSIX + behavior. + +12. POSIX does not specify whether address ranges are checked and + reset if a command is not executed due to a jump. The following + program will behave in different ways depending on whether the + 'c' command is triggered at the third line, i.e. will the text + be output even though line 3 of the input will never logically + encounter that command. + + 2,4b + 1,3c\ + text + + Historic implementations, and this implementation, do not output + the text in the above example. The general rule, therefore, + is that a range whose second address is never matched extends to + the end of the input. + +13. Historical implementations allow an output suppressing #n at the + beginning of -e arguments as well as in a script file. POSIX + does not specify this. This implementation follows historical + practice. + +14. POSIX does not explicitly specify how sed behaves if no script is + specified. Since the sed Synopsis permits this form of the command, + and the language in the Description section states that the input + is output, it seems reasonable that it behave like the cat(1) + command. Historic sed implementations behave differently for "ls | + sed", where they produce no output, and "ls | sed -e#", where they + behave like cat. This implementation behaves like cat in both cases. + +15. The POSIX requirement to open all w files at the beginning makes + sed behave nonintuitively when the w commands are preceded by + addresses or are within conditional blocks. This implementation + follows historic practice and POSIX, by default, and provides the + -a option which opens the files only when they are needed. + +16. POSIX does not specify how escape sequences other than \n and \D + (where D is the delimiter character) are to be treated. This is + reasonable, however, it also doesn't state that the backslash is + to be discarded from the output regardless. A strict reading of + POSIX would be that "echo xyz | sed s/./\a" would display "\ayz". + As historic sed implementations always discarded the backslash, + this implementation does as well. + +17. POSIX specifies that an address can be "empty". This implies + that constructs like ",d" or "1,d" and ",5d" are allowed. This + is not true for historic implementations or this implementation + of sed. + +18. The b t and : commands are documented in POSIX to ignore leading + white space, but no mention is made of trailing white space. + Historic implementations of sed assigned different locations to + the labels "x" and "x ". This is not useful, and leads to subtle + programming errors, but it is historic practice and changing it + could theoretically break working scripts. This implementation + follows historic practice. + +19. Although POSIX specifies that reading from files that do not exist + from within the script must not terminate the script, it does not + specify what happens if a write command fails. Historic practice + is to fail immediately if the file cannot be opened or written. + This implementation follows historic practice. + +20. Historic practice is that the \n construct can be used for either + string1 or string2 of the y command. This is not specified by + POSIX. This implementation follows historic practice. + +21. Deleted. + +22. Historic implementations of sed ignore the RE delimiter characters + within character classes. This is not specified in POSIX. This + implementation follows historic practice. + +23. Historic implementations handle empty RE's in a special way: the + empty RE is interpreted as if it were the last RE encountered, + whether in an address or elsewhere. POSIX does not document this + behavior. For example the command: + + sed -e /abc/s//XXX/ + + substitutes XXX for the pattern abc. The semantics of "the last + RE" can be defined in two different ways: + + 1. The last RE encountered when compiling (lexical/static scope). + 2. The last RE encountered while running (dynamic scope). + + While many historical implementations fail on programs depending + on scope differences, the SunOS version exhibited dynamic scope + behaviour. This implementation does dynamic scoping, as this seems + the most useful and in order to remain consistent with historical + practice. diff --git a/sed/TEST/hanoi.sed b/sed/TEST/hanoi.sed @@ -0,0 +1,103 @@ +# $OpenBSD: hanoi.sed,v 1.2 1996/06/26 05:39:09 deraadt Exp $ +# Towers of Hanoi in sed. +# +# from: @(#)hanoi.sed 8.1 (Berkeley) 6/6/93 +# +# +# Ex: +# Run "sed -f hanoi.sed", and enter: +# +# :abcd: : :<CR><CR> +# +# note -- TWO carriage returns, a peculiarity of sed), this will output the +# sequence of states involved in moving 4 rings, the largest called "a" and +# the smallest called "d", from the first to the second of three towers, so +# that the rings on any tower at any time are in descending order of size. +# You can start with a different arrangement and a different number of rings, +# say :ce:b:ax: and it will give the shortest procedure for moving them all +# to the middle tower. The rules are: the names of the rings must all be +# lower-case letters, they must be input within 3 fields (representing the +# towers) and delimited by 4 colons, such that the letters within each field +# are in alphabetical order (i.e. rings are in descending order of size). +# +# For the benefit of anyone who wants to figure out the script, an "internal" +# line of the form +# b:0abx:1a2b3 :2 :3x2 +# has the following meaning: the material after the three markers :1, :2, +# and :3 represents the three towers; in this case the current set-up is +# ":ab : :x :". The numbers after a, b and x in these fields indicate +# that the next time it gets a chance, it will move a to tower 2, move b +# to tower 3, and move x to tower 2. The string after :0 just keeps track +# of the alphabetical order of the names of the rings. The b at the +# beginning means that it is now dealing with ring b (either about to move +# it, or re-evaluating where it should next be moved to). +# +# Although this version is "limited" to 26 rings because of the size of the +# alphabet, one could write a script using the same idea in which the rings +# were represented by arbitrary [strings][within][brackets], and in place of +# the built-in line of the script giving the order of the letters of the +# alphabet, it would accept from the user a line giving the ordering to be +# assumed, e.g. [ucbvax][decvax][hplabs][foo][bar]. +# +# George Bergman +# Math, UC Berkeley 94720 USA + +# cleaning, diagnostics +s/ *//g +/^$/d +/[^a-z:]/{a\ +Illegal characters: use only a-z and ":". Try again. +d +} +/^:[a-z]*:[a-z]*:[a-z]*:$/!{a\ +Incorrect format: use\ +\ : string1 : string2 : string3 :<CR><CR>\ +Try again. +d +} +/\([a-z]\).*\1/{a\ +Repeated letters not allowed. Try again. +d +} +# initial formatting +h +s/[a-z]/ /g +G +s/^:\( *\):\( *\):\( *\):\n:\([a-z]*\):\([a-z]*\):\([a-z]*\):$/:1\4\2\3:2\5\1\3:3\6\1\2:0/ +s/[a-z]/&2/g +s/^/abcdefghijklmnopqrstuvwxyz/ +:a +s/^\(.\).*\1.*/&\1/ +s/.// +/^[^:]/ba +s/\([^0]*\)\(:0.*\)/\2\1:/ +s/^[^0]*0\(.\)/\1&/ +:b +# outputting current state without markers +h +s/.*:1/:/ +s/[123]//gp +g +:c +# establishing destinations +/^\(.\).*\1:1/td +/^\(.\).*:1[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:1[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +/^\(.\).*:1[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:2[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +/^\(.\).*:2[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:2[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:3[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:3[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:3[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +bc +# iterate back to find smallest out-of-place ring +:d +s/^\(.\)\(:0[^:]*\([^:]\)\1.*:\([123]\)[^:]*\1\)\4/\3\2\4/ +td +# move said ring (right, resp. left) +s/^\(.\)\(.*\)\1\([23]\)\(.*:\3[^ ]*\) /\1\2 \4\1\3/ +s/^\(.\)\(.*:\([12]\)[^ ]*\) \(.*\)\1\3/\1\2\1\3\4 / +tb +s/.*/Done! Try another, or end with ^D./p +d diff --git a/sed/TEST/math.sed b/sed/TEST/math.sed @@ -0,0 +1,164 @@ +# $OpenBSD: math.sed,v 1.2 1996/06/26 05:39:10 deraadt Exp $ +# +# from: @(#)math.sed 8.1 (Berkeley) 6/6/93 +# +# Addition and multiplication in sed. +# ++ for a limited time only do (expr) too!!! +# +# Kevin S Braunsdorf, PUCC UNIX Group, ksb@cc.purdue.edu. +# +# Ex: +# echo "4+7*3" | sed -f %f + +# make sure the expression is well formed +s/[ ]//g +/[+*\/-]$/{ + a\ + poorly formed expression, operator on the end + q +} +/^[+*\/]/{ + a\ + poorly formed expression, leading operator + q +} + +# fill hold space with done token +x +s/^.*/done/ +x + +# main loop, process operators (*, + and () ) +: loop +/^\+/{ + s/// + b loop +} +/^\(.*\)(\([^)]*\))\(.*\)$/{ + H + s//\2/ + x + s/^\(.*\)\n\(.*\)(\([^()]*\))\(.*\)$/()\2@\4@\1/ + x + b loop +} +/^[0-9]*\*/b mul +/^\([0-9]*\)\+\([0-9+*]*\*[0-9]*\)$/{ + s//\2+\1/ + b loop +} +/^[0-9]*\+/{ + s/$/=/ + b add +} +x +/^done$/{ + x + p + d +} +/^()/{ + s/// + x + G + s/\(.*\)\n\([^@]*\)@\([^@]*\)@\(.*\)/\2\1\3/ + x + s/[^@]*@[^@]*@\(.*\)/\1/ + x + b loop +} +i\ +help, stack problem +p +x +p +q + +# turn mul into add until 1*x -> x +: mul +/^0*1\*/{ + s/// + b loop +} +/^\([0-9]*\)0\*/{ + s/^\([0-9]*\)0\*\([0-9]*\)/\1*\20/ + b mul +} +s/^\([0-9]*\)1\*/\10*/ +s/^\([0-9]*\)2\*/\11*/ +s/^\([0-9]*\)3\*/\12*/ +s/^\([0-9]*\)4\*/\13*/ +s/^\([0-9]*\)5\*/\14*/ +s/^\([0-9]*\)6\*/\15*/ +s/^\([0-9]*\)7\*/\16*/ +s/^\([0-9]*\)8\*/\17*/ +s/^\([0-9]*\)9\*/\18*/ +s/\*\([0-9*]*\)/*\1+\1/ +b mul + +# get rid of a plus term until 0+x -> x +: add +/^\+\([0-9+*]*\)=/{ + s//\1/ + b loop +} +/^\([0-9*]*\)\+=/{ + s//\1/ + b loop +} +/^\([0-9]*\)\+\([0-9*+]*\)\+=/{ + s//\2+\1/ + b loop +} +/^\([0-9]*\)0\+\([0-9]*\)\([0-9]\)=/{ + s//\1+\2=\3/ + b add +} +/^\([0-9]*\)\([0-9]\)\+\([0-9]*\)0=/{ + s//\1+\3=\2/ + b add +} +/^\([0-9]*\)0\+\([0-9*+]*\)\+\([0-9]*\)\([0-9]\)=/{ + s//\1+\2+\3=\4/ + b add +} +/^\([0-9]*\)\([0-9]\)\+\([0-9*+]*\)\+\([0-9]*\)0=/{ + s//\1+\3+\4=\2/ + b add +} +s/^\([0-9]*\)1\+/\10+/ +s/^\([0-9]*\)2\+/\11+/ +s/^\([0-9]*\)3\+/\12+/ +s/^\([0-9]*\)4\+/\13+/ +s/^\([0-9]*\)5\+/\14+/ +s/^\([0-9]*\)6\+/\15+/ +s/^\([0-9]*\)7\+/\16+/ +s/^\([0-9]*\)8\+/\17+/ +s/^\([0-9]*\)9\+/\18+/ + +s/9=\([0-9]*\)$/_=\1/ +s/8=\([0-9]*\)$/9=\1/ +s/7=\([0-9]*\)$/8=\1/ +s/6=\([0-9]*\)$/7=\1/ +s/5=\([0-9]*\)$/6=\1/ +s/4=\([0-9]*\)$/5=\1/ +s/3=\([0-9]*\)$/4=\1/ +s/2=\([0-9]*\)$/3=\1/ +s/1=\([0-9]*\)$/2=\1/ +/_/{ + s//_0/ + : inc + s/9_/_0/ + s/8_/9/ + s/7_/8/ + s/6_/7/ + s/5_/6/ + s/4_/5/ + s/3_/4/ + s/2_/3/ + s/1_/2/ + s/0_/1/ + s/\+_/+1/ + /_/b inc +} +b add diff --git a/sed/TEST/sed.test b/sed/TEST/sed.test @@ -0,0 +1,549 @@ +#!/bin/sh - +# $OpenBSD: sed.test,v 1.4 2008/10/07 15:02:45 millert Exp $ +# +# Copyright (c) 1992 Diomidis Spinellis. +# Copyright (c) 1992, 1993 +# The Regents of the University of California. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# from: @(#)sed.test 8.1 (Berkeley) 6/6/93 +# + +# sed Regression Tests +# +# The following files are created: +# lines[1-4], script1, script2 +# Two directories *.out contain the test results + +main() +{ + BASE=/usr/bin/sed + BASELOG=sed.out + TEST=./sed + TESTLOG=nsed.out + DICT=/usr/share/dict/words + + test_error | more + + awk 'END { for (i = 1; i < 15; i++) print "l1_" i}' </dev/null >lines1 + awk 'END { for (i = 1; i < 10; i++) print "l2_" i}' </dev/null >lines2 + + exec 4>&1 5>&2 + + # Set these flags to get messages about known problems + BSD=0 + GNU=0 + SUN=0 + tests $BASE $BASELOG + + BSD=0 + GNU=0 + SUN=0 + tests $TEST $TESTLOG + exec 1>&4 2>&5 + diff $BASELOG $TESTLOG | less +} + +tests() +{ + SED=$1 + DIR=$2 + rm -rf $DIR + mkdir $DIR + MARK=100 + + test_args + test_addr + echo Testing commands + test_group + test_acid + test_branch + test_pattern + test_print + test_subst +} + +mark() +{ + MARK=`expr $MARK + 1` + exec 1>&4 2>&5 + exec >"$DIR/${MARK}_$1" + echo "Test $1:$MARK" + # Uncomment this line to match tests with sed error messages + echo "Test $1:$MARK" >&5 +} + +test_args() +{ + mark '1.1' + echo Testing argument parsing + echo First type + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED 's/^/e1_/p' lines1 + fi + mark '1.2' ; $SED -n 's/^/e1_/p' lines1 + mark '1.3' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED 's/^/e1_/p' <lines1 + fi + mark '1.4' ; $SED -n 's/^/e1_/p' <lines1 + echo Second type + mark '1.4.1' + if [ $SUN -eq 1 ] ; then + echo SunOS sed fails this + fi + $SED -e '' <lines1 + echo 's/^/s1_/p' >script1 + echo 's/^/s2_/p' >script2 + mark '1.5' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -f script1 lines1 + fi + mark '1.6' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -f script1 <lines1 + fi + mark '1.7' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -e 's/^/e1_/p' lines1 + fi + mark '1.8' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -e 's/^/e1_/p' <lines1 + fi + mark '1.9' ; $SED -n -f script1 lines1 + mark '1.10' ; $SED -n -f script1 <lines1 + mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1 + mark '1.12' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -n -e 's/^/e1_/p' <lines1 + fi + mark '1.13' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1 + fi + mark '1.14' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -f script1 -f script2 lines1 + fi + mark '1.15' + if [ $GNU -eq 1 -o $SUN -eq 1 ] ; then + echo GNU and SunOS sed fail this following older POSIX draft + else + $SED -e 's/^/e1_/p' -f script1 lines1 + fi + mark '1.16' + if [ $SUN -eq 1 ] ; then + echo SunOS sed prints only with -n + else + $SED -e 's/^/e1_/p' lines1 lines1 + fi + # POSIX D11.2:11251 + mark '1.17' ; $SED p <lines1 lines1 +cat >script1 <<EOF +#n +# A comment + +p +EOF + mark '1.18' ; $SED -f script1 <lines1 lines1 +} + +test_addr() +{ + echo Testing address ranges + mark '2.1' ; $SED -n -e '4p' lines1 + mark '2.2' ; $SED -n -e '20p' lines1 lines2 + mark '2.3' ; $SED -n -e '$p' lines1 + mark '2.4' ; $SED -n -e '$p' lines1 lines2 + mark '2.5' ; $SED -n -e '$a\ +hello' /dev/null + mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2 + # Should not print anything + mark '2.7' ; $SED -n -e '20p' lines1 + mark '2.8' ; $SED -n -e '0p' lines1 + mark '2.9' ; $SED -n '/l1_7/p' lines1 + mark '2.10' ; $SED -n ' /l1_7/ p' lines1 + mark '2.11' + if [ $BSD -eq 1 ] ; then + echo BSD sed fails this test + fi + if [ $GNU -eq 1 ] ; then + echo GNU sed fails this + fi + $SED -n '\_l1\_7_p' lines1 + mark '2.12' ; $SED -n '1,4p' lines1 + mark '2.13' ; $SED -n '1,$p' lines1 lines2 + mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2 + mark '2.15' ; $SED -n '/4/,$p' lines1 lines2 + mark '2.16' ; $SED -n '/4/,20p' lines1 lines2 + mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2 + mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2 + mark '2.19' + if [ $GNU -eq 1 ] ; then + echo GNU sed fails this + fi + $SED -n '12,3p' lines1 lines2 + mark '2.20' + if [ $GNU -eq 1 ] ; then + echo GNU sed fails this + fi + $SED -n '/l1_7/,3p' lines1 lines2 +} + +test_group() +{ + echo Brace and other grouping + mark '3.1' ; $SED -e ' +4,12 { + s/^/^/ + s/$/$/ + s/_/T/ +}' lines1 + mark '3.2' ; $SED -e ' +4,12 { + s/^/^/ + /6/,/10/ { + s/$/$/ + /8/ s/_/T/ + } +}' lines1 + mark '3.3' ; $SED -e ' +4,12 !{ + s/^/^/ + /6/,/10/ !{ + s/$/$/ + /8/ !s/_/T/ + } +}' lines1 + mark '3.4' ; $SED -e '4,12!s/^/^/' lines1 +} + +test_acid() +{ + echo Testing a c d and i commands + mark '4.1' ; $SED -n -e ' +s/^/before_i/p +20i\ +inserted +s/^/after_i/p +' lines1 lines2 + mark '4.2' ; $SED -n -e ' +5,12s/^/5-12/ +s/^/before_a/p +/5-12/a\ +appended +s/^/after_a/p +' lines1 lines2 + mark '4.3' + if [ $GNU -eq 1 ] ; then + echo GNU sed fails this + fi + $SED -n -e ' +s/^/^/p +/l1_/a\ +appended +8,10N +s/$/$/p +' lines1 lines2 + mark '4.4' ; $SED -n -e ' +c\ +hello +' lines1 + mark '4.5' ; $SED -n -e ' +8c\ +hello +' lines1 + mark '4.6' ; $SED -n -e ' +3,14c\ +hello +' lines1 +# SunOS and GNU sed behave differently. We follow POSIX +# mark '4.7' ; $SED -n -e ' +#8,3c\ +#hello +#' lines1 + mark '4.8' ; $SED d <lines1 +} + +test_branch() +{ + echo Testing labels and branching + mark '5.1' ; $SED -n -e ' +b label4 +:label3 +s/^/label3_/p +b end +:label4 +2,12b label1 +b label2 +:label1 +s/^/label1_/p +b +:label2 +s/^/label2_/p +b label3 +:end +' lines1 + mark '5.2' + if [ $BSD -eq 1 ] ; then + echo BSD sed fails this test + fi + $SED -n -e ' +s/l1_/l2_/ +t ok +b +:ok +s/^/tested /p +' lines1 lines2 +# SunOS sed behaves differently here. Clarification needed. +# mark '5.3' ; $SED -n -e ' +#5,8b inside +#1,5 { +# s/^/^/p +# :inside +# s/$/$/p +#} +#' lines1 +# Check that t clears the substitution done flag + mark '5.4' ; $SED -n -e ' +1,8s/^/^/ +t l1 +:l1 +t l2 +s/$/$/p +b +:l2 +s/^/ERROR/ +' lines1 +# Check that reading a line clears the substitution done flag + mark '5.5' + if [ $BSD -eq 1 ] ; then + echo BSD sed fails this test + fi + $SED -n -e ' +t l2 +1,8s/^/^/p +2,7N +b +:l2 +s/^/ERROR/p +' lines1 + mark '5.6' ; $SED 5q lines1 + mark '5.7' ; $SED -e ' +5i\ +hello +5q' lines1 +# Branch across block boundary + mark '5.8' ; $SED -e ' +{ +:b +} +s/l/m/ +tb' lines1 +} + +test_pattern() +{ +echo Pattern space commands +# Check that the pattern space is deleted + mark '6.1' ; $SED -n -e ' +c\ +changed +p +' lines1 + mark '6.2' ; $SED -n -e ' +4d +p +' lines1 +# SunOS sed refused to print here +# mark '6.3' ; $SED -e ' +#N +#N +#N +#D +#P +#4p +#' lines1 + mark '6.4' ; $SED -e ' +2h +3H +4g +5G +6x +6p +6x +6p +' lines1 + mark '6.5' ; $SED -e '4n' lines1 + mark '6.6' ; $SED -n -e '4n' lines1 +} + +test_print() +{ + echo Testing print and file routines + awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \ + </dev/null >lines3 + # GNU and SunOS sed behave differently here + mark '7.1' + if [ $BSD -eq 1 ] ; then + echo 'BSD sed drops core on this one; TEST SKIPPED' + else + $SED -n l lines3 + fi + mark '7.2' ; $SED -e '/l2_/=' lines1 lines2 + rm -f lines4 + mark '7.3' ; $SED -e '3,12w lines4' lines1 + echo w results + cat lines4 + mark '7.4' ; $SED -e '4r lines2' lines1 + mark '7.5' ; $SED -e '5r /dev/dds' lines1 + mark '7.6' ; $SED -e '6r /dev/null' lines1 + mark '7.7' + if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then + echo BSD, GNU and SunOS cannot pass this one + else + sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1 + rm -rf tmpdir + mkdir tmpdir + $SED -f script1 lines1 + cat tmpdir/* + rm -rf tmpdir + fi + mark '7.8' + if [ $BSD -eq 1 ] ; then + echo BSD sed cannot pass 7.7 + else + echo line1 > lines3 + echo "" >> lines3 + $SED -n -e '$p' lines3 /dev/null + fi + +} + +test_subst() +{ + echo Testing substitution commands + mark '8.1' ; $SED -e 's/./X/g' lines1 + mark '8.2' ; $SED -e 's,.,X,g' lines1 +# GNU and SunOS sed thinks we are escaping . as wildcard, not as separator +# mark '8.3' ; $SED -e 's.\..X.g' lines1 +# POSIX does not say that this should work +# mark '8.4' ; $SED -e 's/[/]/Q/' lines1 + mark '8.4' ; $SED -e 's/[\/]/Q/' lines1 + mark '8.5' ; $SED -e 's_\__X_' lines1 + mark '8.6' ; $SED -e 's/./(&)/g' lines1 + mark '8.7' ; $SED -e 's/./(\&)/g' lines1 + mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1 + mark '8.9' ; $SED -e 's/_/u0\ +u1\ +u2/g' lines1 + mark '8.10' + if [ $BSD -eq 1 -o $GNU -eq 1 ] ; then + echo 'BSD/GNU sed do not understand digit flags on s commands' + fi + $SED -e 's/./X/4' lines1 + rm -f lines4 + mark '8.11' ; $SED -e 's/1/X/w lines4' lines1 + echo s wfile results + cat lines4 + mark '8.12' ; $SED -e 's/[123]/X/g' lines1 + mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1 + mark '8.14' ; + if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then + echo BSD/GNU/SUN sed fail this test + else + $SED -e 'y10\123456789198765432\101' lines1 + fi + mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1 + mark '8.16' + if [ $BSD -eq 1 ] ; then + echo 'BSD sed does not handle branch defined REs' + else + echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \ + -e 's//Y/p' -e '/f/bx' + fi +} + +test_error() +{ + exec 0>&3 4>&1 5>&2 + exec 0</dev/null + exec 2>&1 + set -x + $TEST -x && exit 1 + $TEST -f && exit 1 + $TEST -e && exit 1 + $TEST -f /dev/dds && exit 1 + $TEST p /dev/dds && exit 1 + $TEST -f /bin/sh && exit 1 + $TEST '{' && exit 1 + $TEST '{' && exit 1 + $TEST '/hello/' && exit 1 + $TEST '1,/hello/' && exit 1 + $TEST -e '-5p' && exit 1 + $TEST '/jj' && exit 1 + $TEST 'a hello' && exit 1 + $TEST 'a \ hello' && exit 1 + $TEST 'b foo' && exit 1 + $TEST 'd hello' && exit 1 + $TEST 's/aa' && exit 1 + $TEST 's/aa/' && exit 1 + $TEST 's/a/b' && exit 1 + $TEST 's/a/b/c/d' && exit 1 + $TEST 's/a/b/ 1 2' && exit 1 + $TEST 's/a/b/ 1 g' && exit 1 + $TEST 's/a/b/w' && exit 1 + $TEST 'y/aa' && exit 1 + $TEST 'y/aa/b/' && exit 1 + $TEST 'y/aa/' && exit 1 + $TEST 'y/a/b' && exit 1 + $TEST 'y/a/b/c/d' && exit 1 + $TEST '!' && exit 1 + $TEST supercalifrangolisticexprialidociussupercalifrangolisticexcius + set +x + exec 0>&3 1>&4 2>&5 +} + +main diff --git a/sed/TODO b/sed/TODO @@ -0,0 +1 @@ +workaround REG_STARTEND: http://www.polarhome.com/service/man/generic.php?qf=regex&tf=2&of=OpenBSD&sf=3 diff --git a/sed/compile.c b/sed/compile.c @@ -0,0 +1,861 @@ +/* $OpenBSD: compile.c,v 1.36 2014/10/08 04:19:08 deraadt Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "defs.h" +#include "extern.h" +#include "util.h" + +#define LHSZ 128 +#define LHMASK (LHSZ - 1) +static struct labhash { + struct labhash *lh_next; + u_int lh_hash; + struct s_command *lh_cmd; + int lh_ref; +} *labels[LHSZ]; + +static char *compile_addr(char *, struct s_addr *); +static char *compile_ccl(char **, char *); +static char *compile_delimited(char *, char *, int); +static char *compile_flags(char *, struct s_subst *); +static char *compile_re(char *, regex_t **); +static char *compile_subst(char *, struct s_subst *); +static char *compile_text(void); +static char *compile_tr(char *, char **); +static struct s_command + **compile_stream(struct s_command **); +static char *duptoeol(char *, char *, char **); +static void enterlabel(struct s_command *); +static struct s_command + *findlabel(char *); +static void fixuplabel(struct s_command *, struct s_command *); +static void uselabel(void); + +/* + * Command specification. This is used to drive the command parser. + */ +struct s_format { + char code; /* Command code */ + int naddr; /* Number of address args */ + enum e_args args; /* Argument type */ +}; + +static struct s_format cmd_fmts[] = { + {'{', 2, GROUP}, + {'}', 0, ENDGROUP}, + {'a', 1, TEXT}, + {'b', 2, BRANCH}, + {'c', 2, TEXT}, + {'d', 2, EMPTY}, + {'D', 2, EMPTY}, + {'g', 2, EMPTY}, + {'G', 2, EMPTY}, + {'h', 2, EMPTY}, + {'H', 2, EMPTY}, + {'i', 1, TEXT}, + {'l', 2, EMPTY}, + {'n', 2, EMPTY}, + {'N', 2, EMPTY}, + {'p', 2, EMPTY}, + {'P', 2, EMPTY}, + {'q', 1, EMPTY}, + {'r', 1, RFILE}, + {'s', 2, SUBST}, + {'t', 2, BRANCH}, + {'w', 2, WFILE}, + {'x', 2, EMPTY}, + {'y', 2, TR}, + {'!', 2, NONSEL}, + {':', 0, LABEL}, + {'#', 0, COMMENT}, + {'=', 1, EMPTY}, + {'\0', 0, COMMENT}, +}; + +/* The compiled program. */ +struct s_command *prog; + +/* + * Compile the program into prog. + * Initialise appends. + */ +void +compile(void) +{ + *compile_stream(&prog) = NULL; + fixuplabel(prog, NULL); + uselabel(); + appends = xreallocarray(NULL, appendnum, sizeof(struct s_appends)); + match = xreallocarray(NULL, maxnsub + 1, sizeof(regmatch_t)); +} + +#define EATSPACE() do { \ + if (p) \ + while (isascii((unsigned char)*p) && \ + isspace((unsigned char)*p)) \ + p++; \ + } while (0) + +static struct s_command ** +compile_stream(struct s_command **link) +{ + char *p; + static char *lbuf; /* To avoid excessive malloc calls */ + static size_t bufsize; + struct s_command *cmd, *cmd2, *stack; + struct s_format *fp; + int naddr; /* Number of addresses */ + + stack = 0; + for (;;) { + if ((p = cu_fgets(&lbuf, &bufsize)) == NULL) { + if (stack != 0) + err(COMPILE, "unexpected EOF (pending }'s)"); + return (link); + } + +semicolon: EATSPACE(); + if (*p == '#' || *p == '\0') + continue; + if (*p == ';') { + p++; + goto semicolon; + } + *link = cmd = xmalloc(sizeof(struct s_command)); + link = &cmd->next; + cmd->nonsel = cmd->inrange = 0; + /* First parse the addresses */ + naddr = 0; + +/* Valid characters to start an address */ +#define addrchar(c) (strchr("0123456789/\\$", (c))) + if (addrchar(*p)) { + naddr++; + cmd->a1 = xmalloc(sizeof(struct s_addr)); + p = compile_addr(p, cmd->a1); + EATSPACE(); /* EXTENSION */ + if (*p == ',') { + p++; + EATSPACE(); /* EXTENSION */ + naddr++; + cmd->a2 = xmalloc(sizeof(struct s_addr)); + p = compile_addr(p, cmd->a2); + EATSPACE(); + } else { + cmd->a2 = 0; + } + } else { + cmd->a1 = cmd->a2 = 0; + } + +nonsel: /* Now parse the command */ + if (!*p) + err(COMPILE, "command expected"); + cmd->code = *p; + for (fp = cmd_fmts; fp->code; fp++) + if (fp->code == *p) + break; + if (!fp->code) + err(COMPILE, "invalid command code %c", *p); + if (naddr > fp->naddr) + err(COMPILE, + "command %c expects up to %d address(es), found %d", + *p, fp->naddr, naddr); + switch (fp->args) { + case NONSEL: /* ! */ + p++; + EATSPACE(); + cmd->nonsel = ! cmd->nonsel; + goto nonsel; + case GROUP: /* { */ + p++; + EATSPACE(); + cmd->next = stack; + stack = cmd; + link = &cmd->u.c; + if (*p) + goto semicolon; + break; + case ENDGROUP: + /* + * Short-circuit command processing, since end of + * group is really just a noop. + */ + cmd->nonsel = 1; + if (stack == 0) + err(COMPILE, "unexpected }"); + cmd2 = stack; + stack = cmd2->next; + cmd2->next = cmd; + /*FALLTHROUGH*/ + case EMPTY: /* d D g G h H l n N p P q x = \0 */ + p++; + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + if (*p) + err(COMPILE, +"extra characters at the end of %c command", cmd->code); + break; + case TEXT: /* a c i */ + p++; + EATSPACE(); + if (*p != '\\') + err(COMPILE, "command %c expects \\ followed by" + " text", cmd->code); + p++; + EATSPACE(); + if (*p) + err(COMPILE, "extra characters after \\ at the" + " end of %c command", cmd->code); + cmd->t = compile_text(); + break; + case COMMENT: /* \0 # */ + break; + case WFILE: /* w */ + p++; + EATSPACE(); + if (*p == '\0') + err(COMPILE, "filename expected"); + cmd->t = duptoeol(p, "w command", NULL); + if (aflag) + cmd->u.fd = -1; + else if ((cmd->u.fd = open(p, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(FATAL, "%s: %s", p, strerror(errno)); + break; + case RFILE: /* r */ + p++; + EATSPACE(); + cmd->t = duptoeol(p, "read command", NULL); + break; + case BRANCH: /* b t */ + p++; + EATSPACE(); + if (*p == '\0') + cmd->t = NULL; + else + cmd->t = duptoeol(p, "branch", &p); + if (*p == ';') { + p++; + goto semicolon; + } + break; + case LABEL: /* : */ + p++; + EATSPACE(); + cmd->t = duptoeol(p, "label", &p); + if (strlen(cmd->t) == 0) + err(COMPILE, "empty label"); + enterlabel(cmd); + if (*p == ';') { + p++; + goto semicolon; + } + break; + case SUBST: /* s */ + p++; + if (*p == '\0' || *p == '\\') + err(COMPILE, "substitute pattern can not be" + " delimited by newline or backslash"); + cmd->u.s = xmalloc(sizeof(struct s_subst)); + p = compile_re(p, &cmd->u.s->re); + if (p == NULL) + err(COMPILE, "unterminated substitute pattern"); + --p; + p = compile_subst(p, cmd->u.s); + p = compile_flags(p, cmd->u.s); + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + break; + case TR: /* y */ + p++; + p = compile_tr(p, (char **)&cmd->u.y); + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + if (*p) + err(COMPILE, "extra text at the end of a" + " transform command"); + break; + } + } +} + +/* + * Get a delimited string. P points to the delimeter of the string; d points + * to a buffer area. Newline and delimiter escapes are processed; other + * escapes are ignored. + * + * Returns a pointer to the first character after the final delimiter or NULL + * in the case of a non-terminated string. The character array d is filled + * with the processed string. + */ +static char * +compile_delimited(char *p, char *d, int is_tr) +{ + char c; + + c = *p++; + if (c == '\0') + return (NULL); + else if (c == '\\') + err(COMPILE, "\\ can not be used as a string delimiter"); + else if (c == '\n') + err(COMPILE, "newline can not be used as a string delimiter"); + while (*p) { + if (*p == '[' && *p != c) { + if ((d = compile_ccl(&p, d)) == NULL) + err(COMPILE, "unbalanced brackets ([])"); + continue; + } else if (*p == '\\' && p[1] == '[') { + *d++ = *p++; + } else if (*p == '\\' && p[1] == c) { + p++; + } else if (*p == '\\' && p[1] == 'n') { + *d++ = '\n'; + p += 2; + continue; + } else if (*p == '\\' && p[1] == '\\') { + if (is_tr) + p++; + else + *d++ = *p++; + } else if (*p == c) { + *d = '\0'; + return (p + 1); + } + *d++ = *p++; + } + return (NULL); +} + + +/* compile_ccl: expand a POSIX character class */ +static char * +compile_ccl(char **sp, char *t) +{ + int c, d; + char *s = *sp; + + *t++ = *s++; + if (*s == '^') + *t++ = *s++; + if (*s == ']') + *t++ = *s++; + for (; *s && (*t = *s) != ']'; s++, t++) + if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { + *++t = *++s, t++, s++; + for (c = *s; (*t = *s) != ']' || c != d; s++, t++) + if ((c = *s) == '\0') + return NULL; + } else if (*s == '\\' && s[1] == 'n') { + *t = '\n'; + s++; + } + if (*s == ']') { + *sp = ++s; + return (++t); + } else { + return (NULL); + } +} + +/* + * Get a regular expression. P points to the delimiter of the regular + * expression; repp points to the address of a regexp pointer. Newline + * and delimiter escapes are processed; other escapes are ignored. + * Returns a pointer to the first character after the final delimiter + * or NULL in the case of a non terminated regular expression. The regexp + * pointer is set to the compiled regular expression. + * Cflags are passed to regcomp. + */ +static char * +compile_re(char *p, regex_t **repp) +{ + int eval; + char *re; + + re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */ + p = compile_delimited(p, re, 0); + if (p && strlen(re) == 0) { + *repp = NULL; + free(re); + return (p); + } + *repp = xmalloc(sizeof(regex_t)); + if (p && (eval = regcomp(*repp, re, Eflag ? REG_EXTENDED : 0)) != 0) + err(COMPILE, "RE error: %s", strregerror(eval, *repp)); + if (maxnsub < (*repp)->re_nsub) + maxnsub = (*repp)->re_nsub; + free(re); + return (p); +} + +/* + * Compile the substitution string of a regular expression and set res to + * point to a saved copy of it. Nsub is the number of parenthesized regular + * expressions. + */ +static char * +compile_subst(char *p, struct s_subst *s) +{ + static char *lbuf; + static size_t bufsize; + int asize, ref, size; + char c, *text, *op, *sp; + int sawesc = 0; + + c = *p++; /* Terminator character */ + if (c == '\0') + return (NULL); + + s->maxbref = 0; + s->linenum = linenum; + text = NULL; + asize = size = 0; + do { + size_t len = ROUNDLEN(strlen(p) + 1); + if (asize - size < len) { + do { + asize += len; + } while (asize - size < len); + text = xrealloc(text, asize); + } + op = sp = text + size; + for (; *p; p++) { + if (*p == '\\' || sawesc) { + /* + * If this is a continuation from the last + * buffer, we won't have a character to + * skip over. + */ + if (sawesc) + sawesc = 0; + else + p++; + + if (*p == '\0') { + /* + * This escaped character is continued + * in the next part of the line. Note + * this fact, then cause the loop to + * exit w/ normal EOL case and reenter + * above with the new buffer. + */ + sawesc = 1; + p--; + continue; + } else if (strchr("123456789", *p) != NULL) { + *sp++ = '\\'; + ref = *p - '0'; + if (s->re != NULL && + ref > s->re->re_nsub) + err(COMPILE, +"\\%c not defined in the RE", *p); + if (s->maxbref < ref) + s->maxbref = ref; + } else if (*p == '&' || *p == '\\') + *sp++ = '\\'; + } else if (*p == c) { + p++; + *sp++ = '\0'; + size += sp - op; + s->new = xrealloc(text, size); + return (p); + } else if (*p == '\n') { + err(COMPILE, +"unescaped newline inside substitute pattern"); + /* NOTREACHED */ + } + *sp++ = *p; + } + size += sp - op; + } while ((p = cu_fgets(&lbuf, &bufsize))); + err(COMPILE, "unterminated substitute in regular expression"); + /* NOTREACHED */ + return NULL; +} + +/* + * Compile the flags of the s command + */ +static char * +compile_flags(char *p, struct s_subst *s) +{ + int gn; /* True if we have seen g or n */ + long l; + char wfile[PATH_MAX], *q; + + s->n = 1; /* Default */ + s->p = 0; + s->wfile = NULL; + s->wfd = -1; + for (gn = 0;;) { + EATSPACE(); /* EXTENSION */ + switch (*p) { + case 'g': + if (gn) + err(COMPILE, "more than one number or 'g' in" + " substitute flags"); + gn = 1; + s->n = 0; + break; + case '\0': + case '\n': + case ';': + return (p); + case 'p': + s->p = 1; + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + if (gn) + err(COMPILE, "more than one number or 'g' in" + " substitute flags"); + gn = 1; + l = strtol(p, &p, 10); + if (l <= 0 || l >= INT_MAX) + err(COMPILE, + "number in substitute flags out of range"); + s->n = (int)l; + continue; + case 'w': + p++; +#ifdef HISTORIC_PRACTICE + if (*p != ' ') { + err(WARNING, "space missing before w wfile"); + return (p); + } +#endif + EATSPACE(); + q = wfile; + while (*p) { + if (*p == '\n') + break; + *q++ = *p++; + } + *q = '\0'; + if (q == wfile) + err(COMPILE, "no wfile specified"); + s->wfile = strdup(wfile); + if (!aflag && (s->wfd = open(wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(FATAL, "%s: %s", wfile, strerror(errno)); + return (p); + default: + err(COMPILE, + "bad flag in substitute command: '%c'", *p); + break; + } + p++; + } +} + +/* + * Compile a translation set of strings into a lookup table. + */ +static char * +compile_tr(char *p, char **transtab) +{ + int i; + char *lt, *op, *np; + char *old = NULL, *new = NULL; + + if (*p == '\0' || *p == '\\') + err(COMPILE, +"transform pattern can not be delimited by newline or backslash"); + old = xmalloc(strlen(p) + 1); + p = compile_delimited(p, old, 1); + if (p == NULL) { + err(COMPILE, "unterminated transform source string"); + goto bad; + } + new = xmalloc(strlen(p) + 1); + p = compile_delimited(--p, new, 1); + if (p == NULL) { + err(COMPILE, "unterminated transform target string"); + goto bad; + } + EATSPACE(); + if (strlen(new) != strlen(old)) { + err(COMPILE, "transform strings are not the same length"); + goto bad; + } + /* We assume characters are 8 bits */ + lt = xmalloc(UCHAR_MAX + 1); + for (i = 0; i <= UCHAR_MAX; i++) + lt[i] = (char)i; + for (op = old, np = new; *op; op++, np++) + lt[(u_char)*op] = *np; + *transtab = lt; + free(old); + free(new); + return (p); +bad: + free(old); + free(new); + return (NULL); +} + +/* + * Compile the text following an a, c, or i command. + */ +static char * +compile_text(void) +{ + int asize, esc_nl, size; + char *lbuf, *text, *p, *op, *s; + size_t bufsize; + + lbuf = text = NULL; + asize = size = 0; + while ((p = cu_fgets(&lbuf, &bufsize))) { + size_t len = ROUNDLEN(strlen(p) + 1); + if (asize - size < len) { + do { + asize += len; + } while (asize - size < len); + text = xrealloc(text, asize); + } + op = s = text + size; + for (esc_nl = 0; *p != '\0'; p++) { + if (*p == '\\' && p[1] != '\0' && *++p == '\n') + esc_nl = 1; + *s++ = *p; + } + size += s - op; + if (!esc_nl) { + *s = '\0'; + break; + } + } + free(lbuf); + text = xrealloc(text, size + 1); + text[size] = '\0'; + return (text); +} + +/* + * Get an address and return a pointer to the first character after + * it. Fill the structure pointed to according to the address. + */ +static char * +compile_addr(char *p, struct s_addr *a) +{ + char *end; + + switch (*p) { + case '\\': /* Context address */ + ++p; + /* FALLTHROUGH */ + case '/': /* Context address */ + p = compile_re(p, &a->u.r); + if (p == NULL) + err(COMPILE, "unterminated regular expression"); + a->type = AT_RE; + return (p); + + case '$': /* Last line */ + a->type = AT_LAST; + return (p + 1); + /* Line number */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + a->type = AT_LINE; + a->u.l = strtoul(p, &end, 10); + return (end); + default: + err(COMPILE, "expected context address"); + return (NULL); + } +} + +/* + * duptoeol -- + * Return a copy of all the characters up to \n or \0. + */ +static char * +duptoeol(char *s, char *ctype, char **semi) +{ + size_t len; + int ws; + char *start; + + ws = 0; + if (semi) { + for (start = s; *s != '\0' && *s != '\n' && *s != ';'; ++s) + ws = isspace((unsigned char)*s); + } else { + for (start = s; *s != '\0' && *s != '\n'; ++s) + ws = isspace((unsigned char)*s); + *s = '\0'; + } + if (ws) + err(WARNING, "whitespace after %s", ctype); + len = s - start + 1; + if (semi) + *semi = s; + s = xmalloc(len); + strlcpy(s, start, len); + return (s); +} + +/* + * Convert goto label names to addresses, and count a and r commands, in + * the given subset of the script. Free the memory used by labels in b + * and t commands (but not by :). + * + * TODO: Remove } nodes + */ +static void +fixuplabel(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch (cp->code) { + case 'a': + case 'r': + appendnum++; + break; + case 'b': + case 't': + /* Resolve branch target. */ + if (cp->t == NULL) { + cp->u.c = NULL; + break; + } + if ((cp->u.c = findlabel(cp->t)) == NULL) + err(COMPILE2, "undefined label '%s'", cp->t); + free(cp->t); + break; + case '{': + /* Do interior commands. */ + fixuplabel(cp->u.c, cp->next); + break; + } +} + +/* + * Associate the given command label for later lookup. + */ +static void +enterlabel(struct s_command *cp) +{ + struct labhash **lhp, *lh; + u_char *p; + u_int h, c; + + for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) + h = (h << 5) + h + c; + lhp = &labels[h & LHMASK]; + for (lh = *lhp; lh != NULL; lh = lh->lh_next) + if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) + err(COMPILE2, "duplicate label '%s'", cp->t); + lh = xmalloc(sizeof *lh); + lh->lh_next = *lhp; + lh->lh_hash = h; + lh->lh_cmd = cp; + lh->lh_ref = 0; + *lhp = lh; +} + +/* + * Find the label contained in the command l in the command linked + * list cp. L is excluded from the search. Return NULL if not found. + */ +static struct s_command * +findlabel(char *name) +{ + struct labhash *lh; + u_char *p; + u_int h, c; + + for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) + h = (h << 5) + h + c; + for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { + if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { + lh->lh_ref = 1; + return (lh->lh_cmd); + } + } + return (NULL); +} + +/* + * Warn about any unused labels. As a side effect, release the label hash + * table space. + */ +static void +uselabel(void) +{ + struct labhash *lh, *next; + int i; + + for (i = 0; i < LHSZ; i++) { + for (lh = labels[i]; lh != NULL; lh = next) { + next = lh->lh_next; + if (!lh->lh_ref) + err(WARNING, "unused label '%s'", + lh->lh_cmd->t); + free(lh); + } + } +} diff --git a/sed/defs.h b/sed/defs.h @@ -0,0 +1,148 @@ +/* * $OpenBSD: defs.h,v 1.4 2008/10/16 16:34:32 millert Exp $*/ +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)defs.h 8.1 (Berkeley) 6/6/93 + */ + +/* + * Types of address specifications + */ +enum e_atype { + AT_RE, /* Line that match RE */ + AT_LINE, /* Specific line */ + AT_LAST, /* Last line */ +}; + +/* + * Format of an address + */ +struct s_addr { + enum e_atype type; /* Address type */ + union { + u_long l; /* Line number */ + regex_t *r; /* Regular expression */ + } u; +}; + +/* + * Substitution command + */ +struct s_subst { + int n; /* Occurrence to subst. */ + int p; /* True if p flag */ + char *wfile; /* NULL if no wfile */ + int wfd; /* Cached file descriptor */ + regex_t *re; /* Regular expression */ + int maxbref; /* Largest backreference. */ + u_long linenum; /* Line number. */ + char *new; /* Replacement text */ +}; + + +/* + * An internally compiled command. + * Initialy, label references are stored in t, on a second pass they + * are updated to pointers. + */ +struct s_command { + struct s_command *next; /* Pointer to next command */ + struct s_addr *a1, *a2; /* Start and end address */ + char *t; /* Text for : a c i r w */ + union { + struct s_command *c; /* Command(s) for b t { */ + struct s_subst *s; /* Substitute command */ + u_char *y; /* Replace command array */ + int fd; /* File descriptor for w */ + } u; + char code; /* Command code */ + u_int nonsel:1; /* True if ! */ + u_int inrange:1; /* True if in range */ +}; + +/* + * Types of command arguments recognised by the parser + */ +enum e_args { + EMPTY, /* d D g G h H l n N p P q x = \0 */ + TEXT, /* a c i */ + NONSEL, /* ! */ + GROUP, /* { */ + ENDGROUP, /* } */ + COMMENT, /* # */ + BRANCH, /* b t */ + LABEL, /* : */ + RFILE, /* r */ + WFILE, /* w */ + SUBST, /* s */ + TR /* y */ +}; + +/* + * Structure containing things to append before a line is read + */ +struct s_appends { + enum {AP_STRING, AP_FILE} type; + char *s; + size_t len; +}; + +enum e_spflag { + APPEND, /* Append to the contents. */ + REPLACE, /* Replace the contents. */ +}; + +/* + * Structure for a space (process, hold, otherwise). + */ +typedef struct { + char *space; /* Current space pointer. */ + size_t len; /* Current length. */ + int deleted; /* If deleted. */ + char *back; /* Backing memory. */ + size_t blen; /* Backing memory length. */ +} SPACE; + +/* + * Error severity codes: + */ +#define FATAL 0 /* Exit immediately with 1 */ +#define ERROR 1 /* Continue, but change exit value */ +#define WARNING 2 /* Just print the warning */ +#define COMPILE 3 /* Print error, count and finish script */ +#define COMPILE2 3 /* Print error, count and finish script */ + +/* + * Round up to the nearest multiple of _POSIX2_LINE_MAX + */ +#define ROUNDLEN(x) \ + (((x) + _POSIX2_LINE_MAX - 1) & ~(_POSIX2_LINE_MAX - 1)) diff --git a/sed/extern.h b/sed/extern.h @@ -0,0 +1,57 @@ +/* * $OpenBSD: extern.h,v 1.7 2014/10/08 04:19:08 deraadt Exp $*/ +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +extern struct s_command *prog; +extern struct s_appends *appends; +extern regmatch_t *match; +extern size_t maxnsub; +extern u_long linenum; +extern int appendnum; +extern int lastline; +extern int Eflag, aflag, eflag, nflag; +extern char *fname; + +void cfclose(struct s_command *, struct s_command *); +void compile(void); +void cspace(SPACE *, char *, size_t, enum e_spflag); +char *cu_fgets(char **, size_t *); +void err(int, const char *, ...); +int mf_fgets(SPACE *, enum e_spflag); +void process(void); +char *strregerror(int, regex_t *); +void *xmalloc(size_t); +void *xreallocarray(void *, size_t, size_t); +void *xrealloc(void *, size_t); diff --git a/sed/fgetln.c b/sed/fgetln.c @@ -0,0 +1,69 @@ +/* + * Copyright © 2005 Hector Garcia Alvarez + * Copyright © 2005, 2008-2012 Guillem Jover <guillem@hadrons.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <sys/cdefs.h> +#include <sys/types.h> +#include <string.h> + +struct filebuf { + FILE *fp; + char *buf; + size_t len; +}; + +#define FILEBUF_POOL_ITEMS 32 + +static struct filebuf fb_pool[FILEBUF_POOL_ITEMS]; +static int fb_pool_cur; + +char * +fgetln(FILE *stream, size_t *len) +{ + struct filebuf *fb; + ssize_t nread; + + /* Try to diminish the possibility of several fgetln() calls being + * used on different streams, by using a pool of buffers per file. */ + fb = &fb_pool[fb_pool_cur]; + if (fb->fp != stream && fb->fp != NULL) { + fb_pool_cur++; + fb_pool_cur %= FILEBUF_POOL_ITEMS; + fb = &fb_pool[fb_pool_cur]; + } + fb->fp = stream; + + nread = getline(&fb->buf, &fb->len, stream); + /* Note: the getdelim/getline API ensures nread != 0. */ + if (nread == -1) { + *len = 0; + return NULL; + } else { + *len = (size_t)nread; + return fb->buf; + } +} diff --git a/sed/main.c b/sed/main.c @@ -0,0 +1,358 @@ +/* $OpenBSD: main.c,v 1.17 2009/10/27 23:59:43 deraadt Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <regex.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "defs.h" +#include "extern.h" +#include "util.h" + +/* + * Linked list of units (strings and files) to be compiled + */ +struct s_compunit { + struct s_compunit *next; + enum e_cut {CU_FILE, CU_STRING} type; + char *s; /* Pointer to string or fname */ +}; + +/* + * Linked list pointer to compilation units and pointer to current + * next pointer. + */ +static struct s_compunit *script, **cu_nextp = &script; + +/* + * Linked list of files to be processed + */ +struct s_flist { + char *fname; + struct s_flist *next; +}; + +/* + * Linked list pointer to files and pointer to current + * next pointer. + */ +static struct s_flist *files, **fl_nextp = &files; + +int Eflag, aflag, eflag, nflag; + +/* + * Current file and line number; line numbers restart across compilation + * units, but span across input files. + */ +char *fname; /* File name. */ +u_long linenum; +int lastline; /* TRUE on the last line of the last file */ + +static void add_compunit(enum e_cut, char *); +static void add_file(char *); + +int +main(int argc, char *argv[]) +{ + int c, fflag; + + fflag = 0; + while ((c = getopt(argc, argv, "Eae:f:nru")) != -1) + switch (c) { + case 'E': + case 'r': + Eflag = 1; + break; + case 'a': + aflag = 1; + break; + case 'e': + eflag = 1; + add_compunit(CU_STRING, optarg); + break; + case 'f': + fflag = 1; + add_compunit(CU_FILE, optarg); + break; + case 'n': + nflag = 1; + break; + case 'u': + setlinebuf(stdout); + break; + default: + case '?': + (void)fprintf(stderr, + "usage: sed [-aEnru] command [file ...]\n" + " sed [-aEnru] [-e command] [-f command_file] [file ...]\n"); + exit(1); + } + argc -= optind; + argv += optind; + + /* First usage case; script is the first arg */ + if (!eflag && !fflag && *argv) { + add_compunit(CU_STRING, *argv); + argv++; + } + + compile(); + + /* Continue with first and start second usage */ + if (*argv) + for (; *argv; argv++) + add_file(*argv); + else + add_file(NULL); + process(); + cfclose(prog, NULL); + if (fclose(stdout)) + err(FATAL, "stdout: %s", strerror(errno)); + exit (0); +} + +/* + * Like fgets, but go through the chain of compilation units chaining them + * together. Empty strings and files are ignored. + */ +char * +cu_fgets(char **outbuf, size_t *outsize) +{ + static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF; + static FILE *f; /* Current open file */ + static char *s; /* Current pointer inside string */ + static char string_ident[30]; + size_t len; + char *p; + + if (*outbuf == NULL) + *outsize = 0; + +again: + switch (state) { + case ST_EOF: + if (script == NULL) + return (NULL); + linenum = 0; + switch (script->type) { + case CU_FILE: + if ((f = fopen(script->s, "r")) == NULL) + err(FATAL, + "%s: %s", script->s, strerror(errno)); + fname = script->s; + state = ST_FILE; + goto again; + case CU_STRING: + if ((snprintf(string_ident, + sizeof(string_ident), "\"%s\"", script->s)) >= + sizeof(string_ident)) + strlcpy(string_ident + + sizeof(string_ident) - 6, " ...\"", 5); + fname = string_ident; + s = script->s; + state = ST_STRING; + goto again; + } + case ST_FILE: + if ((p = fgetln(f, &len)) != NULL) { + linenum++; + if (len >= *outsize) { + free(*outbuf); + *outsize = ROUNDLEN(len + 1); + *outbuf = xmalloc(*outsize); + } + memcpy(*outbuf, p, len); + (*outbuf)[len] = '\0'; + if (linenum == 1 && p[0] == '#' && p[1] == 'n') + nflag = 1; + return (*outbuf); + } + script = script->next; + (void)fclose(f); + state = ST_EOF; + goto again; + case ST_STRING: + if (linenum == 0 && s[0] == '#' && s[1] == 'n') + nflag = 1; + p = *outbuf; + len = *outsize; + for (;;) { + if (len <= 1) { + *outbuf = xrealloc(*outbuf, + *outsize + _POSIX2_LINE_MAX); + p = *outbuf + *outsize - len; + len += _POSIX2_LINE_MAX; + *outsize += _POSIX2_LINE_MAX; + } + switch (*s) { + case '\0': + state = ST_EOF; + if (s == script->s) { + script = script->next; + goto again; + } else { + script = script->next; + *p = '\0'; + linenum++; + return (*outbuf); + } + case '\n': + *p++ = '\n'; + *p = '\0'; + s++; + linenum++; + return (*outbuf); + default: + *p++ = *s++; + len--; + } + } + } + /* NOTREACHED */ + return NULL; +} + +/* + * Like fgets, but go through the list of files chaining them together. + * Set len to the length of the line. + */ +int +mf_fgets(SPACE *sp, enum e_spflag spflag) +{ + static FILE *f; /* Current open file */ + size_t len; + char *p; + int c; + + if (f == NULL) + /* Advance to first non-empty file */ + for (;;) { + if (files == NULL) { + lastline = 1; + return (0); + } + if (files->fname == NULL) { + f = stdin; + fname = "stdin"; + } else { + fname = files->fname; + if ((f = fopen(fname, "r")) == NULL) + err(FATAL, "%s: %s", + fname, strerror(errno)); + } + if ((c = getc(f)) != EOF) { + (void)ungetc(c, f); + break; + } + (void)fclose(f); + files = files->next; + } + + if (lastline) { + sp->len = 0; + return (0); + } + + /* + * Use fgetln so that we can handle essentially infinite input data. + * Can't use the pointer into the stdio buffer as the process space + * because the ungetc() can cause it to move. + */ + p = fgetln(f, &len); + if (ferror(f)) + err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO)); + cspace(sp, p, len, spflag); + + linenum++; + /* Advance to next non-empty file */ + while ((c = getc(f)) == EOF) { + (void)fclose(f); + files = files->next; + if (files == NULL) { + lastline = 1; + return (1); + } + if (files->fname == NULL) { + f = stdin; + fname = "stdin"; + } else { + fname = files->fname; + if ((f = fopen(fname, "r")) == NULL) + err(FATAL, "%s: %s", fname, strerror(errno)); + } + } + (void)ungetc(c, f); + return (1); +} + +/* + * Add a compilation unit to the linked list + */ +static void +add_compunit(enum e_cut type, char *s) +{ + struct s_compunit *cu; + + cu = xmalloc(sizeof(struct s_compunit)); + cu->type = type; + cu->s = s; + cu->next = NULL; + *cu_nextp = cu; + cu_nextp = &cu->next; +} + +/* + * Add a file to the linked list + */ +static void +add_file(char *s) +{ + struct s_flist *fp; + + fp = xmalloc(sizeof(struct s_flist)); + fp->next = NULL; + *fl_nextp = fp; + fp->fname = s; + fl_nextp = &fp->next; +} diff --git a/sed/misc.c b/sed/misc.c @@ -0,0 +1,124 @@ +/* $OpenBSD: misc.c,v 1.10 2014/10/08 04:19:08 deraadt Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <errno.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include "defs.h" +#include "extern.h" +#include "util.h" + +/* + * malloc with result test + */ +void * +xmalloc(size_t size) +{ + void *p; + + if ((p = malloc(size)) == NULL) + err(FATAL, "%s", strerror(errno)); + return (p); +} + +void * +xreallocarray(void *o, size_t nmemb, size_t size) +{ + void *p; + + if ((p = reallocarray(o, nmemb, size)) == NULL) + err(FATAL, "%s", strerror(errno)); + return (p); +} + +/* + * realloc with result test + */ +void * +xrealloc(void *p, size_t size) +{ + + if ((p = realloc(p, size)) == NULL) + err(FATAL, "%s", strerror(errno)); + return (p); +} + +/* + * Return a string for a regular expression error passed. This is a overkill, + * because of the silly semantics of regerror (we can never know the size of + * the buffer). + */ +char * +strregerror(int errcode, regex_t *preg) +{ + static char *oe; + size_t s; + + free(oe); + s = regerror(errcode, preg, "", 0); + oe = xmalloc(s); + (void)regerror(errcode, preg, oe, s); + return (oe); +} + +/* + * Error reporting function + */ +void +err(int severity, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)fprintf(stderr, "sed: "); + switch (severity) { + case WARNING: + case COMPILE: + (void)fprintf(stderr, "%lu: %s: ", linenum, fname); + } + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + if (severity == WARNING) + return; + exit(1); + /* NOTREACHED */ +} diff --git a/sed/process.c b/sed/process.c @@ -0,0 +1,625 @@ +/* $OpenBSD: process.c,v 1.19 2013/11/28 18:24:55 deraadt Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/uio.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "defs.h" +#include "extern.h" +#include "util.h" + +static SPACE HS, PS, SS; +#define pd PS.deleted +#define ps PS.space +#define psl PS.len +#define hs HS.space +#define hsl HS.len + +static inline int applies(struct s_command *); +static void flush_appends(void); +static void lputs(char *); +static inline int regexec_e(regex_t *, char *, int, int, size_t); +static void regsub(SPACE *, char *, char *); +static int substitute(struct s_command *); + +struct s_appends *appends; /* Array of pointers to strings to append. */ +static int appendx; /* Index into appends array. */ +int appendnum; /* Size of appends array. */ + +static int lastaddr; /* Set by applies if last address of a range. */ +static int sdone; /* If any substitutes since last line input. */ + /* Iov structure for 'w' commands. */ +static regex_t *defpreg; +size_t maxnsub; +regmatch_t *match; + +#define OUT(s) do { fwrite(s, sizeof(u_char), psl, stdout); } while (0) + +void +process(void) +{ + struct s_command *cp; + SPACE tspace; + size_t len, oldpsl; + char *p; + + for (linenum = 0; mf_fgets(&PS, REPLACE);) { + pd = 0; +top: + cp = prog; +redirect: + while (cp != NULL) { + if (!applies(cp)) { + cp = cp->next; + continue; + } + switch (cp->code) { + case '{': + cp = cp->u.c; + goto redirect; + case 'a': + if (appendx >= appendnum) { + appends = xrealloc(appends, + sizeof(struct s_appends) * + (appendnum * 2)); + appendnum *= 2; + } + appends[appendx].type = AP_STRING; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 'b': + cp = cp->u.c; + goto redirect; + case 'c': + pd = 1; + psl = 0; + if (cp->a2 == NULL || lastaddr) + (void)printf("%s", cp->t); + break; + case 'd': + pd = 1; + goto new; + case 'D': + if (pd) + goto new; + if (psl == 0 || + (p = memchr(ps, '\n', psl - 1)) == NULL) { + pd = 1; + goto new; + } else { + psl -= (p + 1) - ps; + memmove(ps, p + 1, psl); + goto top; + } + case 'g': + cspace(&PS, hs, hsl, REPLACE); + break; + case 'G': + if (hs == NULL) + cspace(&HS, "\n", 1, REPLACE); + cspace(&PS, hs, hsl, 0); + break; + case 'h': + cspace(&HS, ps, psl, REPLACE); + break; + case 'H': + cspace(&HS, ps, psl, 0); + break; + case 'i': + (void)printf("%s", cp->t); + break; + case 'l': + lputs(ps); + break; + case 'n': + if (!nflag && !pd) + OUT(ps); + flush_appends(); + if (!mf_fgets(&PS, REPLACE)) + exit(0); + pd = 0; + break; + case 'N': + flush_appends(); + if (!mf_fgets(&PS, 0)) { + if (!nflag && !pd) + OUT(ps); + exit(0); + } + break; + case 'p': + if (pd) + break; + OUT(ps); + break; + case 'P': + if (pd) + break; + if (psl != 0 && + (p = memchr(ps, '\n', psl - 1)) != NULL) { + oldpsl = psl; + psl = (p + 1) - ps; + } + OUT(ps); + if (p != NULL) + psl = oldpsl; + break; + case 'q': + if (!nflag && !pd) + OUT(ps); + flush_appends(); + exit(0); + case 'r': + if (appendx >= appendnum) + appends = xrealloc(appends, + sizeof(struct s_appends) * + (appendnum *= 2)); + appends[appendx].type = AP_FILE; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 's': + sdone |= substitute(cp); + break; + case 't': + if (sdone) { + sdone = 0; + cp = cp->u.c; + goto redirect; + } + break; + case 'w': + if (pd) + break; + if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(FATAL, "%s: %s", + cp->t, strerror(errno)); + if (write(cp->u.fd, ps, psl) != psl) + err(FATAL, "%s: %s", + cp->t, strerror(errno)); + break; + case 'x': + if (hs == NULL) + cspace(&HS, "\n", 1, REPLACE); + tspace = PS; + PS = HS; + HS = tspace; + break; + case 'y': + if (pd || psl == 0) + break; + for (p = ps, len = psl; --len; ++p) + *p = cp->u.y[(unsigned char)*p]; + break; + case ':': + case '}': + break; + case '=': + (void)printf("%lu\n", linenum); + } + cp = cp->next; + } /* for all cp */ + +new: if (!nflag && !pd) + OUT(ps); + flush_appends(); + } /* for all lines */ +} + +/* + * TRUE if the address passed matches the current program state + * (lastline, linenumber, ps). + */ +#define MATCH(a) \ + (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ + (a)->type == AT_LINE ? linenum == (a)->u.l : lastline + +/* + * Return TRUE if the command applies to the current line. Sets the inrange + * flag to process ranges. Interprets the non-select (``!'') flag. + */ +static inline int +applies(struct s_command *cp) +{ + int r; + + lastaddr = 0; + if (cp->a1 == NULL && cp->a2 == NULL) + r = 1; + else if (cp->a2) + if (cp->inrange) { + if (MATCH(cp->a2)) { + cp->inrange = 0; + lastaddr = 1; + } + r = 1; + } else if (MATCH(cp->a1)) { + /* + * If the second address is a number less than or + * equal to the line number first selected, only + * one line shall be selected. + * -- POSIX 1003.2 + */ + if (cp->a2->type == AT_LINE && + linenum >= cp->a2->u.l) + lastaddr = 1; + else + cp->inrange = 1; + r = 1; + } else + r = 0; + else + r = MATCH(cp->a1); + return (cp->nonsel ? !r : r); +} + +/* + * substitute -- + * Do substitutions in the pattern space. Currently, we build a + * copy of the new pattern space in the substitute space structure + * and then swap them. + */ +static int +substitute(struct s_command *cp) +{ + SPACE tspace; + regex_t *re; + regoff_t slen; + int n, lastempty; + char *s; + + s = ps; + re = cp->u.s->re; + if (re == NULL) { + if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { + linenum = cp->u.s->linenum; + err(COMPILE, "\\%d not defined in the RE", + cp->u.s->maxbref); + } + } + if (!regexec_e(re, s, 0, 0, psl)) + return (0); + + SS.len = 0; /* Clean substitute space. */ + slen = psl; + n = cp->u.s->n; + lastempty = 1; + + do { + /* Copy the leading retained string. */ + if (n <= 1 && match[0].rm_so) + cspace(&SS, s, match[0].rm_so, APPEND); + + /* Skip zero-length matches right after other matches. */ + if (lastempty || match[0].rm_so || + match[0].rm_so != match[0].rm_eo) { + if (n <= 1) { + /* Want this match: append replacement. */ + regsub(&SS, s, cp->u.s->new); + if (n == 1) + n = -1; + } else { + /* Want a later match: append original. */ + if (match[0].rm_eo) + cspace(&SS, s, match[0].rm_eo, APPEND); + n--; + } + } + + /* Move past this match. */ + s += match[0].rm_eo; + slen -= match[0].rm_eo; + + /* + * After a zero-length match, advance one byte, + * and at the end of the line, terminate. + */ + if (match[0].rm_so == match[0].rm_eo) { + if (*s == '\0' || *s == '\n') + slen = -1; + else + slen--; + if (*s != '\0') + cspace(&SS, s++, 1, APPEND); + lastempty = 1; + } else + lastempty = 0; + + } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); + + /* Did not find the requested number of matches. */ + if (n > 1) + return (0); + + /* Copy the trailing retained string. */ + if (slen > 0) + cspace(&SS, s, slen, APPEND); + + /* + * Swap the substitute space and the pattern space, and make sure + * that any leftover pointers into stdio memory get lost. + */ + tspace = PS; + PS = SS; + SS = tspace; + SS.space = SS.back; + + /* Handle the 'p' flag. */ + if (cp->u.s->p) + OUT(ps); + + /* Handle the 'w' flag. */ + if (cp->u.s->wfile && !pd) { + if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) + err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); + if (write(cp->u.s->wfd, ps, psl) != psl) + err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); + } + return (1); +} + +/* + * Flush append requests. Always called before reading a line, + * therefore it also resets the substitution done (sdone) flag. + */ +static void +flush_appends(void) +{ + FILE *f; + int count, i; + char buf[8 * 1024]; + + for (i = 0; i < appendx; i++) + switch (appends[i].type) { + case AP_STRING: + fwrite(appends[i].s, sizeof(char), appends[i].len, + stdout); + break; + case AP_FILE: + /* + * Read files probably shouldn't be cached. Since + * it's not an error to read a non-existent file, + * it's possible that another program is interacting + * with the sed script through the file system. It + * would be truly bizarre, but possible. It's probably + * not that big a performance win, anyhow. + */ + if ((f = fopen(appends[i].s, "r")) == NULL) + break; + while ((count = fread(buf, sizeof(char), sizeof(buf), f))) + (void)fwrite(buf, sizeof(char), count, stdout); + (void)fclose(f); + break; + } + if (ferror(stdout)) + err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); + appendx = sdone = 0; +} + +static void +lputs(char *s) +{ + int count; + char *escapes, *p; + struct winsize win; + static int termwidth = -1; + + if (termwidth == -1) { + if ((p = getenv("COLUMNS"))) + termwidth = atoi(p); + else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && + win.ws_col > 0) + termwidth = win.ws_col; + else + termwidth = 60; + } + + for (count = 0; *s; ++s) { + if (count >= termwidth) { + (void)printf("\\\n"); + count = 0; + } + if (isascii((unsigned char)*s) && isprint((unsigned char)*s) + && *s != '\\') { + (void)putchar(*s); + count++; + } else if (*s != '\n') { + escapes = "\\\a\b\f\r\t\v"; + (void)putchar('\\'); + if ((p = strchr(escapes, *s))) { + (void)putchar("\\abfrtv"[p - escapes]); + count += 2; + } else { + (void)printf("%03o", *(u_char *)s); + count += 4; + } + } + } + (void)putchar('$'); + (void)putchar('\n'); + if (ferror(stdout)) + err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); +} + +static inline int +regexec_e(regex_t *preg, char *string, int eflags, + int nomatch, size_t slen) +{ + int eval; + int tmp; + + if (preg == NULL) { + if (defpreg == NULL) + err(FATAL, "first RE may not be empty"); + } else + defpreg = preg; + + /* Set anchors, discounting trailing newline (if any). */ + if (slen > 0 && string[slen - 1] == '\n') + slen--; + + /* workaround for systems that don't support REG_STARTEND */ + tmp = string[slen]; + string[slen] = '\0'; + + eval = regexec(defpreg, string, + nomatch ? 0 : maxnsub + 1, match, eflags); + + string[slen] = tmp; /* restore byte */ + + switch (eval) { + case 0: + return (1); + case REG_NOMATCH: + return (0); + } + err(FATAL, "RE error: %s", strregerror(eval, defpreg)); + /* NOTREACHED */ + return 0; +} + +/* + * regsub - perform substitutions after a regexp match + * Based on a routine by Henry Spencer + */ +static void +regsub(SPACE *sp, char *string, char *src) +{ + int len, no; + char c, *dst; + +#define NEEDSP(reqlen) \ + if (sp->len + (reqlen) + 1 >= sp->blen) { \ + size_t newlen = sp->blen + (reqlen) + 1024; \ + sp->space = sp->back = xrealloc(sp->back, newlen); \ + sp->blen = newlen; \ + dst = sp->space + sp->len; \ + } + + dst = sp->space + sp->len; + while ((c = *src++) != '\0') { + if (c == '&') + no = 0; + else if (c == '\\' && isdigit((unsigned char)*src)) + no = *src++ - '0'; + else + no = -1; + if (no < 0) { /* Ordinary character. */ + if (c == '\\' && (*src == '\\' || *src == '&')) + c = *src++; + NEEDSP(1); + *dst++ = c; + ++sp->len; + } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { + len = match[no].rm_eo - match[no].rm_so; + NEEDSP(len); + memmove(dst, string + match[no].rm_so, len); + dst += len; + sp->len += len; + } + } + NEEDSP(1); + *dst = '\0'; +} + +/* + * aspace -- + * Append the source space to the destination space, allocating new + * space as necessary. + */ +void +cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag) +{ + size_t tlen; + + /* Make sure SPACE has enough memory and ramp up quickly. */ + tlen = sp->len + len + 1; + if (tlen > sp->blen) { + size_t newlen = tlen + 1024; + sp->space = sp->back = xrealloc(sp->back, newlen); + sp->blen = newlen; + } + + if (spflag == REPLACE) + sp->len = 0; + + memmove(sp->space + sp->len, p, len); + + sp->space[sp->len += len] = '\0'; +} + +/* + * Close all cached opened files and report any errors + */ +void +cfclose(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch (cp->code) { + case 's': + if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) + err(FATAL, + "%s: %s", cp->u.s->wfile, strerror(errno)); + cp->u.s->wfd = -1; + break; + case 'w': + if (cp->u.fd != -1 && close(cp->u.fd)) + err(FATAL, "%s: %s", cp->t, strerror(errno)); + cp->u.fd = -1; + break; + case '{': + cfclose(cp->u.c, cp->next); + break; + } +} diff --git a/sed/reallocarray.c b/sed/reallocarray.c @@ -0,0 +1,38 @@ +/* $OpenBSD: reallocarray.c,v 1.1 2014/05/08 21:43:49 deraadt Exp $ */ +/* + * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} diff --git a/sed/sed.1 b/sed/sed.1 @@ -0,0 +1,566 @@ +.\" $OpenBSD: sed.1,v 1.44 2014/10/22 23:23:22 schwarze Exp $ +.\" +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from: @(#)sed.1 8.2 (Berkeley) 12/30/93 +.\" +.Dd $Mdocdate: October 22 2014 $ +.Dt SED 1 +.Os +.Sh NAME +.Nm sed +.Nd stream editor +.Sh SYNOPSIS +.Nm sed +.Op Fl aEnru +.Ar command +.Op Ar +.Nm sed +.Op Fl aEnru +.Op Fl e Ar command +.Op Fl f Ar command_file +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility reads the specified files, or the standard input if no files +are specified, modifying the input as specified by a list of commands. +The input is then written to the standard output. +.Pp +A single command may be specified as the first argument to +.Nm sed . +Multiple commands may be specified +separated by newlines or semicolons, +or by using the +.Fl e +or +.Fl f +options. +All commands are applied to the input in the order they are specified +regardless of their origin. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +The files listed as parameters for the +.Ic w +function or flag are created (or truncated) before any processing begins, +by default. +The +.Fl a +option causes +.Nm +to delay opening each file until a command containing the related +.Ic w +function or flag is applied to a line of input. +.It Fl E +Interpret regular expressions using POSIX extended regular expression syntax. +The default behaviour is to use POSIX basic regular expression syntax. +.It Fl e Ar command +Append the editing commands specified by the +.Ar command +argument +to the list of commands. +.It Fl f Ar command_file +Append the editing commands found in the file +.Ar command_file +to the list of commands. +The editing commands should each be listed on a separate line. +.It Fl r +An alias for +.Fl E , +for compatibility with GNU sed. +.It Fl n +By default, each line of input is echoed to the standard output after +all of the commands have been applied to it. +The +.Fl n +option suppresses this behavior. +.It Fl u +Force output to be line buffered, +printing each line as it becomes available. +By default, output is line buffered when standard output is a terminal +and block buffered otherwise. +See +.Xr setbuf 3 +for a more detailed explanation. +.El +.Pp +The form of a +.Nm +command is as follows: +.Pp +.Dl [address[,address]]function[arguments] +.Pp +Whitespace may be inserted before the first address and the function +portions of the command. +.Pp +Normally, +.Nm +cyclically copies a line of input, not including its terminating newline +character, into a +.Em pattern space , +(unless there is something left after a +.Ic D +function), +applies all of the commands with addresses that select that pattern space, +copies the pattern space to the standard output, appending a newline, and +deletes the pattern space. +.Pp +Some of the functions use a +.Em hold space +to save all or part of the pattern space for subsequent retrieval. +.Sh SED ADDRESSES +An address is not required, but if specified must be a number (that counts +input lines +cumulatively across input files), a dollar character +.Pq Ql $ +that addresses the last line of input, or a context address +(which consists of a regular expression preceded and followed by a +delimiter). +.Pp +A command line with no addresses selects every pattern space. +.Pp +A command line with one address selects all of the pattern spaces +that match the address. +.Pp +A command line with two addresses selects the inclusive range from +the first pattern space that matches the first address through the next +pattern space that matches the second. +(If the second address is a number less than or equal to the line number +first selected, only that line is selected.) +Starting at the first line following the selected range, +.Nm +starts looking again for the first address. +.Pp +Editing commands can be applied to non-selected pattern spaces by use +of the exclamation character +.Pq Ql \&! +function. +.Sh SED REGULAR EXPRESSIONS +By default, +.Nm +regular expressions are basic regular expressions +.Pq BREs . +Extended regular expressions are supported using the +.Fl E +and +.Fl r +options. +See +.Xr re_format 7 +for more information on regular expressions. +In addition, +.Nm +has the following two additions to BREs: +.Pp +.Bl -enum -compact +.It +In a context address, any character other than a backslash +.Pq Ql \e +or newline character may be used to delimit the regular expression. +The opening delimiter should be preceded by a backslash +unless it is a slash. +Putting a backslash character before the delimiting character +causes the character to be treated literally. +For example, in the context address \exabc\exdefx, the RE delimiter +is an +.Sq x +and the second +.Sq x +stands for itself, so that the regular expression is +.Dq abcxdef . +.Pp +.It +The escape sequence \en matches a newline character embedded in the +pattern space. +You can't, however, use a literal newline character in an address or +in the substitute command. +.El +.Pp +One special feature of +.Nm +regular expressions is that they can default to the last regular +expression used. +If a regular expression is empty, i.e., just the delimiter characters +are specified, the last regular expression encountered is used instead. +The last regular expression is defined as the last regular expression +used as part of an address or substitute command, and at run-time, not +compile-time. +For example, the command +.Dq /abc/s//XXX/ +will substitute +.Dq XXX +for the pattern +.Dq abc . +.Sh SED FUNCTIONS +In the following list of commands, the maximum number of permissible +addresses for each command is indicated by [0addr], [1addr], or [2addr], +representing zero, one, or two addresses. +.Pp +The argument +.Ar text +consists of one or more lines. +To embed a newline in the text, precede it with a backslash. +Other backslashes in text are deleted and the following character +taken literally. +.Pp +The +.Ic r +and +.Ic w +functions, +as well as the +.Cm w +flag to the +.Ic s +function, +take an optional +.Ar file +parameter, +which should be separated from the function or flag by whitespace. +Files are created +(or their contents truncated) +before any input processing begins. +.Pp +The +.Ic b , +.Ic r , +.Ic s , +.Ic t , +.Ic w , +.Ic y , +and +.Ic \&: +functions all accept additional arguments. +The synopses below indicate which arguments have to be separated from +the function letters by whitespace characters. +.Pp +Functions can be combined to form a +.Em function list , +a list of +.Nm +functions each followed by a newline, as follows: +.Bd -literal -offset indent +{ function + function + ... + function +} +.Ed +.Pp +The braces can be preceded and followed by whitespace. +The functions can be preceded by whitespace as well. +.Pp +Functions and function lists may be preceded by an exclamation mark, +in which case they are applied only to lines that are +.Em not +selected by the addresses. +.Bl -tag -width Ds +.It [2addr] Ar function-list +Execute +.Ar function-list +only when the pattern space is selected. +.It Xo [1 addr] Ic a Ns \e +.br +.Ar text +.Xc +.Pp +Write +.Ar text +to standard output immediately before each attempt to read a line of input, +whether by executing the +.Ic N +function or by beginning a new cycle. +.It [2addr] Ns Ic b Bq Ar label +Branch to the +.Ic \&: +function with the specified +.Ar label . +If the label is not specified, branch to the end of the script. +.It Xo [2addr] Ic c Ns \e +.br +.Ar text +.Xc +.Pp +Delete the pattern space. +With 0 or 1 address or at the end of a 2-address range, +.Ar text +is written to the standard output. +.It [2addr] Ns Ic d +Delete the pattern space and start the next cycle. +.It [2addr] Ns Ic D +Delete the initial segment of the pattern space through the first +newline character and start the next cycle. +.It [2addr] Ns Ic g +Replace the contents of the pattern space with the contents of the +hold space. +.It [2addr] Ns Ic G +Append a newline character followed by the contents of the hold space +to the pattern space. +.It [2addr] Ns Ic h +Replace the contents of the hold space with the contents of the +pattern space. +.It [2addr] Ns Ic H +Append a newline character followed by the contents of the pattern space +to the hold space. +.It Xo [1addr] Ic i Ns \e +.br +.Ar text +.Xc +.Pp +Write +.Ar text +to the standard output. +.It [2addr] Ns Ic l +(The letter ell.) +Write the pattern space to the standard output in a visually unambiguous +form. +This form is as follows: +.Pp +.Bl -tag -width "carriage-returnXX" -offset indent -compact +.It backslash +\e\e +.It alert +\ea +.It backspace +\eb +.It form-feed +\ef +.It carriage-return +\er +.It tab +\et +.It vertical tab +\ev +.El +.Pp +Non-printable characters are written as three-digit octal numbers (with a +preceding backslash) for each byte in the character (most significant byte +first). +Long lines are folded, with the point of folding indicated by displaying +a backslash followed by a newline. +The end of each line is marked with a +.Ql $ . +.It [2addr] Ns Ic n +Write the pattern space to the standard output if the default output has +not been suppressed, and replace the pattern space with the next line of +input. +.It [2addr] Ns Ic N +Append the next line of input to the pattern space, using an embedded +newline character to separate the appended material from the original +contents. +Note that the current line number changes. +.It [2addr] Ns Ic p +Write the pattern space to standard output. +.It [2addr] Ns Ic P +Write the pattern space, up to the first newline character, +to the standard output. +.It [1addr] Ns Ic q +Branch to the end of the script and quit without starting a new cycle. +.It [1addr] Ns Ic r Ar file +Copy the contents of +.Ar file +to the standard output immediately before the next attempt to read a +line of input. +If +.Ar file +cannot be read for any reason, it is silently ignored and no error +condition is set. +.It [2addr] Ns Ic s Ns / Ns Ar RE Ns / Ns Ar replacement Ns / Ns Ar flags +Substitute the +.Ar replacement +string for the first instance of the regular expression +.Ar RE +in the pattern space. +Any character other than backslash or newline can be used instead of +a slash to delimit the regular expression and the replacement. +Within the regular expression and the replacement, +the regular expression delimiter itself can be used as +a literal character if it is preceded by a backslash. +.Pp +An ampersand +.Pq Ql & +appearing in the replacement is replaced by the string matching the +regular expression. +The special meaning of +.Ql & +in this context can be suppressed by preceding it by a backslash. +The string +.Ql \e# , +where +.Ql # +is a digit, is replaced by the text matched +by the corresponding backreference expression (see +.Xr re_format 7 ) . +.Pp +A line can be split by substituting a newline character into it. +To specify a newline character in the replacement string, precede it with +a backslash. +.Pp +The value of +.Ar flags +in the substitute function is zero or more of the following: +.Bl -tag -width "XXXXXX" -offset indent +.It Cm 0 No ... Cm 9 +Make the substitution only for the N'th occurrence of the regular +expression in the pattern space. +.It Cm g +Make the substitution for all non-overlapping matches of the +regular expression, not just the first one. +.It Cm p +Write the pattern space to standard output if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.It Cm w Ar file +Append the pattern space to +.Ar file +if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.El +.It [2addr] Ns Ic t Bq Ar label +Branch to the +.Ic \&: +function bearing the +.Ar label +if any substitutions have been made since the +most recent reading of an input line or execution of a +.Ic t +function. +If no label is specified, branch to the end of the script. +.It [2addr] Ns Ic w Ar file +Append the pattern space to the +.Ar file . +.It [2addr] Ns Ic x +Swap the contents of the pattern and hold spaces. +.It [2addr] Ns Ic y Ns / Ns Ar string1 Ns / Ns Ar string2 Ns / +Replace all occurrences of characters in +.Ar string1 +in the pattern space with the corresponding characters from +.Ar string2 . +Any character other than a backslash or newline can be used instead of +a slash to delimit the strings. +Within +.Ar string1 +and +.Ar string2 , +a backslash followed by any character other than a newline is that literal +character, and a backslash followed by an +.Sq n +is replaced by a newline character. +.It [0addr] Ns Ic \&: Ns Ar label +This function does nothing; it bears a +.Ar label +to which the +.Ic b +and +.Ic t +commands may branch. +.It [1addr] Ns Ic = +Write the line number to the standard output followed by a newline character. +.It [0addr] +Empty lines are ignored. +.It [0addr] Ns Ic # +The +.Ql # +and the remainder of the line are ignored (treated as a comment), with +the single exception that if the first two characters in the file are +.Ql #n , +the default output is suppressed. +This is the same as specifying the +.Fl n +option on the command line. +.El +.Sh EXIT STATUS +.Ex -std sed +.Sh EXAMPLES +The following simulates the +.Xr cat 1 +.Fl s +command, +squeezing excess empty lines from standard input: +.Bd -literal -offset indent +$ sed -n ' +# Write non-empty lines. +/./ { + p + d + } +# Write a single empty line, then look for more empty lines. +/^$/ p +# Get the next line, discard the held <newline> (empty line), +# and look for more empty lines. +:Empty +/^$/ { + N + s/.// + b Empty + } +# Write the non-empty line before going back to search +# for the first in a set of empty lines. + p +\&' +.Ed +.Sh SEE ALSO +.Xr awk 1 , +.Xr ed 1 , +.Xr grep 1 , +.Xr re_format 7 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aEru +are extensions to that specification. +.Pp +The use of newlines to separate multiple commands on the command line +is non-portable; +the use of newlines to separate multiple commands within a command file +.Pq Fl f Ar command_file +is portable. +.Sh HISTORY +A +.Nm +command appeared in +.At v7 . +.Sh CAVEATS +The use of semicolons to separate multiple commands +is not permitted for the following commands: +.Ic a , b , c , +.Ic i , r , t , +.Ic w , \&: , +and +.Ic # . diff --git a/sed/strlcat.c b/sed/strlcat.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> +#include <sys/types.h> + +#include "../util.h" + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + return(dlen + (s - src)); /* count does not include NUL */ +} diff --git a/sed/strlcpy.c b/sed/strlcpy.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> +#include <sys/types.h> + +#include "../util.h" + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} diff --git a/sed/util.h b/sed/util.h @@ -0,0 +1,8 @@ +#include <sys/stat.h> + +#define DEFFILEMODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) + +char *fgetln(FILE *, size_t *); +void *reallocarray(void *, size_t, size_t); +size_t strlcat(char *, const char *, size_t); +size_t strlcpy(char *, const char *, size_t);