hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

regexp9.3 (3606B)


      1 .deEX
      2 .ift .ft5
      3 .nf
      4 ..
      5 .deEE
      6 .ft1
      7 .fi
      8 ..
      9 .TH REGEXP9 3
     10 .SH NAME
     11 regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression
     12 .SH SYNOPSIS
     13 .B #include <utf.h>
     14 .br
     15 .B #include <fmt.h>
     16 .br
     17 .B #include <regexp9.h>
     18 .PP
     19 .ta \w'\fLRegprog 'u
     20 .B
     21 Reprog	*regcomp(char *exp)
     22 .PP
     23 .B
     24 Reprog	*regcomplit(char *exp)
     25 .PP
     26 .B
     27 Reprog	*regcompnl(char *exp)
     28 .PP
     29 .nf
     30 .B
     31 int  regexec(Reprog *prog, char *string, Resub *match, int msize)
     32 .PP
     33 .nf
     34 .B
     35 void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
     36 .PP
     37 .nf
     38 .B
     39 int  rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
     40 .PP
     41 .nf
     42 .B
     43 void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
     44 .PP
     45 .B
     46 void regerror(char *msg)
     47 .SH DESCRIPTION
     48 .I Regcomp
     49 compiles a
     50 regular expression and returns
     51 a pointer to the generated description.
     52 The space is allocated by
     53 .IR malloc (3)
     54 and may be released by
     55 .IR free .
     56 Regular expressions are exactly as in
     57 .IR regexp9 (7).
     58 .PP
     59 .I Regcomplit
     60 is like
     61 .I regcomp
     62 except that all characters are treated literally.
     63 .I Regcompnl
     64 is like
     65 .I regcomp
     66 except that the
     67 .B .
     68 metacharacter matches all characters, including newlines.
     69 .PP
     70 .I Regexec
     71 matches a null-terminated
     72 .I string
     73 against the compiled regular expression in
     74 .IR prog .
     75 If it matches,
     76 .I regexec
     77 returns
     78 .B 1
     79 and fills in the array
     80 .I match
     81 with character pointers to the substrings of
     82 .I string
     83 that correspond to the
     84 parenthesized subexpressions of 
     85 .IR exp :
     86 .BI match[ i ].sp
     87 points to the beginning and
     88 .BI match[ i ].ep
     89 points just beyond
     90 the end of the
     91 .IR i th
     92 substring.
     93 (Subexpression
     94 .I i
     95 begins at the
     96 .IR i th
     97 left parenthesis, counting from 1.)
     98 Pointers in
     99 .B match[0]
    100 pick out the substring that corresponds to
    101 the whole regular expression.
    102 Unused elements of
    103 .I match
    104 are filled with zeros.
    105 Matches involving
    106 .LR * ,
    107 .LR + ,
    108 and 
    109 .L ?
    110 are extended as far as possible.
    111 The number of array elements in 
    112 .I match
    113 is given by
    114 .IR msize .
    115 The structure of elements of
    116 .I match 
    117 is:
    118 .IP
    119 .EX
    120 typedef struct {
    121 	union {
    122 	   char *sp;
    123 	   Rune *rsp;
    124 	};
    125 	union {
    126 	   char *ep;
    127 	   Rune *rep;
    128 	};
    129 } Resub;
    130 .EE
    131 .LP
    132 If
    133 .B match[0].sp
    134 is nonzero on entry,
    135 .I regexec
    136 starts matching at that point within
    137 .IR string .
    138 If
    139 .B match[0].ep
    140 is nonzero on entry,
    141 the last character matched is the one
    142 preceding that point.
    143 .PP
    144 .I Regsub
    145 places in
    146 .I dest
    147 a substitution instance of
    148 .I source
    149 in the context of the last
    150 .I regexec
    151 performed using
    152 .IR match .
    153 Each instance of
    154 .BI \e n\f1,
    155 where
    156 .I n
    157 is a digit, is replaced by the
    158 string delimited by
    159 .BI match[ n ].sp
    160 and
    161 .BI match[ n ].ep\f1.
    162 Each instance of 
    163 .L &
    164 is replaced by the string delimited by
    165 .B match[0].sp
    166 and
    167 .BR match[0].ep .
    168 The substitution will always be null terminated and
    169 trimmed to fit into dlen bytes.
    170 .PP
    171 .IR Regerror ,
    172 called whenever an error is detected in
    173 .IR regcomp ,
    174 writes the string
    175 .I msg
    176 on the standard error file and exits.
    177 .I Regerror
    178 can be replaced to perform
    179 special error processing.
    180 If the user supplied
    181 .I regerror
    182 returns rather than exits,
    183 .I regcomp
    184 will return 0. 
    185 .PP
    186 .I Rregexec
    187 and
    188 .I rregsub
    189 are variants of 
    190 .I regexec
    191 and
    192 .I regsub
    193 that use strings of
    194 .B Runes
    195 instead of strings of
    196 .BR chars .
    197 With these routines, the 
    198 .I rsp
    199 and
    200 .I rep
    201 fields of the
    202 .I match
    203 array elements should be used.
    204 .SH SOURCE
    205 .B http://swtch.com/plan9port/unix
    206 .SH "SEE ALSO"
    207 .IR grep (1)
    208 .SH DIAGNOSTICS
    209 .I Regcomp
    210 returns 
    211 .B 0
    212 for an illegal expression
    213 or other failure.
    214 .I Regexec
    215 returns 0
    216 if
    217 .I string
    218 is not matched.
    219 .SH BUGS
    220 There is no way to specify or match a NUL character; NULs terminate patterns and strings.