regexec.c (5029B)
1 #include "lib9.h" 2 #include "regexp9.h" 3 #include "regcomp.h" 4 5 6 /* 7 * return 0 if no match 8 * >0 if a match 9 * <0 if we ran out of _relist space 10 */ 11 static int 12 regexec1(Reprog *progp, /* program to run */ 13 char *bol, /* string to run machine on */ 14 Resub *mp, /* subexpression elements */ 15 int ms, /* number of elements at mp */ 16 Reljunk *j 17 ) 18 { 19 int flag=0; 20 Reinst *inst; 21 Relist *tlp; 22 char *s; 23 int i, checkstart; 24 Rune r, *rp, *ep; 25 int n; 26 Relist* tl; /* This list, next list */ 27 Relist* nl; 28 Relist* tle; /* ends of this and next list */ 29 Relist* nle; 30 int match; 31 char *p; 32 33 match = 0; 34 checkstart = j->starttype; 35 if(mp) 36 for(i=0; i<ms; i++) { 37 mp[i].s.sp = 0; 38 mp[i].e.ep = 0; 39 } 40 j->relist[0][0].inst = 0; 41 j->relist[1][0].inst = 0; 42 43 /* Execute machine once for each character, including terminal NUL */ 44 s = j->starts; 45 do{ 46 /* fast check for first char */ 47 if(checkstart) { 48 switch(j->starttype) { 49 case RUNE: 50 p = utfrune(s, j->startchar); 51 if(p == 0 || s == j->eol) 52 return match; 53 s = p; 54 break; 55 case BOL: 56 if(s == bol) 57 break; 58 p = utfrune(s, '\n'); 59 if(p == 0 || s == j->eol) 60 return match; 61 s = p+1; 62 break; 63 } 64 } 65 r = *(uchar*)s; 66 if(r < Runeself) 67 n = 1; 68 else 69 n = chartorune(&r, s); 70 71 /* switch run lists */ 72 tl = j->relist[flag]; 73 tle = j->reliste[flag]; 74 nl = j->relist[flag^=1]; 75 nle = j->reliste[flag]; 76 nl->inst = 0; 77 78 /* Add first instruction to current list */ 79 if(match == 0) 80 _renewemptythread(tl, progp->startinst, ms, s); 81 82 /* Execute machine until current list is empty */ 83 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 84 for(inst = tlp->inst; ; inst = inst->u2.next){ 85 switch(inst->type){ 86 case RUNE: /* regular character */ 87 if(inst->u1.r == r){ 88 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 89 return -1; 90 } 91 break; 92 case LBRA: 93 tlp->se.m[inst->u1.subid].s.sp = s; 94 continue; 95 case RBRA: 96 tlp->se.m[inst->u1.subid].e.ep = s; 97 continue; 98 case ANY: 99 if(r != '\n') 100 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 101 return -1; 102 break; 103 case ANYNL: 104 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 105 return -1; 106 break; 107 case BOL: 108 if(s == bol || *(s-1) == '\n') 109 continue; 110 break; 111 case EOL: 112 if(s == j->eol || r == 0 || r == '\n') 113 continue; 114 break; 115 case CCLASS: 116 ep = inst->u1.cp->end; 117 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 118 if(r >= rp[0] && r <= rp[1]){ 119 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 120 return -1; 121 break; 122 } 123 break; 124 case NCCLASS: 125 ep = inst->u1.cp->end; 126 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 127 if(r >= rp[0] && r <= rp[1]) 128 break; 129 if(rp == ep) 130 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 131 return -1; 132 break; 133 case OR: 134 /* evaluate right choice later */ 135 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) 136 return -1; 137 /* efficiency: advance and re-evaluate */ 138 continue; 139 case END: /* Match! */ 140 match = 1; 141 tlp->se.m[0].e.ep = s; 142 if(mp != 0) 143 _renewmatch(mp, ms, &tlp->se); 144 break; 145 } 146 break; 147 } 148 } 149 if(s == j->eol) 150 break; 151 checkstart = j->starttype && nl->inst==0; 152 s += n; 153 }while(r); 154 return match; 155 } 156 157 static int 158 regexec2(Reprog *progp, /* program to run */ 159 char *bol, /* string to run machine on */ 160 Resub *mp, /* subexpression elements */ 161 int ms, /* number of elements at mp */ 162 Reljunk *j 163 ) 164 { 165 int rv; 166 Relist *relist0, *relist1; 167 168 /* mark space */ 169 relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); 170 if(relist0 == nil) 171 return -1; 172 relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); 173 if(relist1 == nil){ 174 free(relist1); 175 return -1; 176 } 177 j->relist[0] = relist0; 178 j->relist[1] = relist1; 179 j->reliste[0] = relist0 + BIGLISTSIZE - 2; 180 j->reliste[1] = relist1 + BIGLISTSIZE - 2; 181 182 rv = regexec1(progp, bol, mp, ms, j); 183 free(relist0); 184 free(relist1); 185 return rv; 186 } 187 188 extern int 189 regexec(Reprog *progp, /* program to run */ 190 char *bol, /* string to run machine on */ 191 Resub *mp, /* subexpression elements */ 192 int ms) /* number of elements at mp */ 193 { 194 Reljunk j; 195 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 196 int rv; 197 198 /* 199 * use user-specified starting/ending location if specified 200 */ 201 j.starts = bol; 202 j.eol = 0; 203 if(mp && ms>0){ 204 if(mp->s.sp) 205 j.starts = mp->s.sp; 206 if(mp->e.ep) 207 j.eol = mp->e.ep; 208 } 209 j.starttype = 0; 210 j.startchar = 0; 211 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { 212 j.starttype = RUNE; 213 j.startchar = progp->startinst->u1.r; 214 } 215 if(progp->startinst->type == BOL) 216 j.starttype = BOL; 217 218 /* mark space */ 219 j.relist[0] = relist0; 220 j.relist[1] = relist1; 221 j.reliste[0] = relist0 + nelem(relist0) - 2; 222 j.reliste[1] = relist1 + nelem(relist1) - 2; 223 224 rv = regexec1(progp, bol, mp, ms, &j); 225 if(rv >= 0) 226 return rv; 227 rv = regexec2(progp, bol, mp, ms, &j); 228 if(rv >= 0) 229 return rv; 230 return -1; 231 }