regexp9.3 (3606B)
1 .deEX 2 .ift .ft5 3 .nf 4 .. 5 .deEE 6 .ft1 7 .fi 8 .. 9 .TH REGEXP9 3 10 .SH NAME 11 regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression 12 .SH SYNOPSIS 13 .B #include <utf.h> 14 .br 15 .B #include <fmt.h> 16 .br 17 .B #include <regexp9.h> 18 .PP 19 .ta \w'\fLRegprog 'u 20 .B 21 Reprog *regcomp(char *exp) 22 .PP 23 .B 24 Reprog *regcomplit(char *exp) 25 .PP 26 .B 27 Reprog *regcompnl(char *exp) 28 .PP 29 .nf 30 .B 31 int regexec(Reprog *prog, char *string, Resub *match, int msize) 32 .PP 33 .nf 34 .B 35 void regsub(char *source, char *dest, int dlen, Resub *match, int msize) 36 .PP 37 .nf 38 .B 39 int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) 40 .PP 41 .nf 42 .B 43 void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) 44 .PP 45 .B 46 void regerror(char *msg) 47 .SH DESCRIPTION 48 .I Regcomp 49 compiles a 50 regular expression and returns 51 a pointer to the generated description. 52 The space is allocated by 53 .IR malloc (3) 54 and may be released by 55 .IR free . 56 Regular expressions are exactly as in 57 .IR regexp9 (7). 58 .PP 59 .I Regcomplit 60 is like 61 .I regcomp 62 except that all characters are treated literally. 63 .I Regcompnl 64 is like 65 .I regcomp 66 except that the 67 .B . 68 metacharacter matches all characters, including newlines. 69 .PP 70 .I Regexec 71 matches a null-terminated 72 .I string 73 against the compiled regular expression in 74 .IR prog . 75 If it matches, 76 .I regexec 77 returns 78 .B 1 79 and fills in the array 80 .I match 81 with character pointers to the substrings of 82 .I string 83 that correspond to the 84 parenthesized subexpressions of 85 .IR exp : 86 .BI match[ i ].sp 87 points to the beginning and 88 .BI match[ i ].ep 89 points just beyond 90 the end of the 91 .IR i th 92 substring. 93 (Subexpression 94 .I i 95 begins at the 96 .IR i th 97 left parenthesis, counting from 1.) 98 Pointers in 99 .B match[0] 100 pick out the substring that corresponds to 101 the whole regular expression. 102 Unused elements of 103 .I match 104 are filled with zeros. 105 Matches involving 106 .LR * , 107 .LR + , 108 and 109 .L ? 110 are extended as far as possible. 111 The number of array elements in 112 .I match 113 is given by 114 .IR msize . 115 The structure of elements of 116 .I match 117 is: 118 .IP 119 .EX 120 typedef struct { 121 union { 122 char *sp; 123 Rune *rsp; 124 }; 125 union { 126 char *ep; 127 Rune *rep; 128 }; 129 } Resub; 130 .EE 131 .LP 132 If 133 .B match[0].sp 134 is nonzero on entry, 135 .I regexec 136 starts matching at that point within 137 .IR string . 138 If 139 .B match[0].ep 140 is nonzero on entry, 141 the last character matched is the one 142 preceding that point. 143 .PP 144 .I Regsub 145 places in 146 .I dest 147 a substitution instance of 148 .I source 149 in the context of the last 150 .I regexec 151 performed using 152 .IR match . 153 Each instance of 154 .BI \e n\f1, 155 where 156 .I n 157 is a digit, is replaced by the 158 string delimited by 159 .BI match[ n ].sp 160 and 161 .BI match[ n ].ep\f1. 162 Each instance of 163 .L & 164 is replaced by the string delimited by 165 .B match[0].sp 166 and 167 .BR match[0].ep . 168 The substitution will always be null terminated and 169 trimmed to fit into dlen bytes. 170 .PP 171 .IR Regerror , 172 called whenever an error is detected in 173 .IR regcomp , 174 writes the string 175 .I msg 176 on the standard error file and exits. 177 .I Regerror 178 can be replaced to perform 179 special error processing. 180 If the user supplied 181 .I regerror 182 returns rather than exits, 183 .I regcomp 184 will return 0. 185 .PP 186 .I Rregexec 187 and 188 .I rregsub 189 are variants of 190 .I regexec 191 and 192 .I regsub 193 that use strings of 194 .B Runes 195 instead of strings of 196 .BR chars . 197 With these routines, the 198 .I rsp 199 and 200 .I rep 201 fields of the 202 .I match 203 array elements should be used. 204 .SH SOURCE 205 .B http://swtch.com/plan9port/unix 206 .SH "SEE ALSO" 207 .IR grep (1) 208 .SH DIAGNOSTICS 209 .I Regcomp 210 returns 211 .B 0 212 for an illegal expression 213 or other failure. 214 .I Regexec 215 returns 0 216 if 217 .I string 218 is not matched. 219 .SH BUGS 220 There is no way to specify or match a NUL character; NULs terminate patterns and strings.