rregexec.c (4729B)
1 #include "lib9.h" 2 #include "regexp9.h" 3 #include "regcomp.h" 4 5 /* 6 * return 0 if no match 7 * >0 if a match 8 * <0 if we ran out of _relist space 9 */ 10 static int 11 rregexec1(Reprog *progp, /* program to run */ 12 Rune *bol, /* string to run machine on */ 13 Resub *mp, /* subexpression elements */ 14 int ms, /* number of elements at mp */ 15 Reljunk *j) 16 { 17 int flag=0; 18 Reinst *inst; 19 Relist *tlp; 20 Rune *s; 21 int i, checkstart; 22 Rune r, *rp, *ep; 23 Relist* tl; /* This list, next list */ 24 Relist* nl; 25 Relist* tle; /* ends of this and next list */ 26 Relist* nle; 27 int match; 28 Rune *p; 29 30 match = 0; 31 checkstart = j->startchar; 32 if(mp) 33 for(i=0; i<ms; i++) { 34 mp[i].s.rsp = 0; 35 mp[i].e.rep = 0; 36 } 37 j->relist[0][0].inst = 0; 38 j->relist[1][0].inst = 0; 39 40 /* Execute machine once for each character, including terminal NUL */ 41 s = j->rstarts; 42 do{ 43 44 /* fast check for first char */ 45 if(checkstart) { 46 switch(j->starttype) { 47 case RUNE: 48 p = runestrchr(s, j->startchar); 49 if(p == 0 || p == j->reol) 50 return match; 51 s = p; 52 break; 53 case BOL: 54 if(s == bol) 55 break; 56 p = runestrchr(s, '\n'); 57 if(p == 0 || s == j->reol) 58 return match; 59 s = p+1; 60 break; 61 } 62 } 63 64 r = *s; 65 66 /* switch run lists */ 67 tl = j->relist[flag]; 68 tle = j->reliste[flag]; 69 nl = j->relist[flag^=1]; 70 nle = j->reliste[flag]; 71 nl->inst = 0; 72 73 /* Add first instruction to current list */ 74 _rrenewemptythread(tl, progp->startinst, ms, s); 75 76 /* Execute machine until current list is empty */ 77 for(tlp=tl; tlp->inst; tlp++){ 78 for(inst=tlp->inst; ; inst = inst->u2.next){ 79 switch(inst->type){ 80 case RUNE: /* regular character */ 81 if(inst->u1.r == r) 82 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 83 return -1; 84 break; 85 case LBRA: 86 tlp->se.m[inst->u1.subid].s.rsp = s; 87 continue; 88 case RBRA: 89 tlp->se.m[inst->u1.subid].e.rep = s; 90 continue; 91 case ANY: 92 if(r != '\n') 93 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 94 return -1; 95 break; 96 case ANYNL: 97 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 98 return -1; 99 break; 100 case BOL: 101 if(s == bol || *(s-1) == '\n') 102 continue; 103 break; 104 case EOL: 105 if(s == j->reol || r == 0 || r == '\n') 106 continue; 107 break; 108 case CCLASS: 109 ep = inst->u1.cp->end; 110 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 111 if(r >= rp[0] && r <= rp[1]){ 112 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 113 return -1; 114 break; 115 } 116 break; 117 case NCCLASS: 118 ep = inst->u1.cp->end; 119 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 120 if(r >= rp[0] && r <= rp[1]) 121 break; 122 if(rp == ep) 123 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 124 return -1; 125 break; 126 case OR: 127 /* evaluate right choice later */ 128 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) 129 return -1; 130 /* efficiency: advance and re-evaluate */ 131 continue; 132 case END: /* Match! */ 133 match = 1; 134 tlp->se.m[0].e.rep = s; 135 if(mp != 0) 136 _renewmatch(mp, ms, &tlp->se); 137 break; 138 } 139 break; 140 } 141 } 142 if(s == j->reol) 143 break; 144 checkstart = j->startchar && nl->inst==0; 145 s++; 146 }while(r); 147 return match; 148 } 149 150 static int 151 rregexec2(Reprog *progp, /* program to run */ 152 Rune *bol, /* string to run machine on */ 153 Resub *mp, /* subexpression elements */ 154 int ms, /* number of elements at mp */ 155 Reljunk *j 156 ) 157 { 158 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; 159 160 /* mark space */ 161 j->relist[0] = relist0; 162 j->relist[1] = relist1; 163 j->reliste[0] = relist0 + nelem(relist0) - 2; 164 j->reliste[1] = relist1 + nelem(relist1) - 2; 165 166 return rregexec1(progp, bol, mp, ms, j); 167 } 168 169 extern int 170 rregexec(Reprog *progp, /* program to run */ 171 Rune *bol, /* string to run machine on */ 172 Resub *mp, /* subexpression elements */ 173 int ms) /* number of elements at mp */ 174 { 175 Reljunk j; 176 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 177 int rv; 178 179 /* 180 * use user-specified starting/ending location if specified 181 */ 182 j.rstarts = bol; 183 j.reol = 0; 184 if(mp && ms>0){ 185 if(mp->s.sp) 186 j.rstarts = mp->s.rsp; 187 if(mp->e.ep) 188 j.reol = mp->e.rep; 189 } 190 j.starttype = 0; 191 j.startchar = 0; 192 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { 193 j.starttype = RUNE; 194 j.startchar = progp->startinst->u1.r; 195 } 196 if(progp->startinst->type == BOL) 197 j.starttype = BOL; 198 199 /* mark space */ 200 j.relist[0] = relist0; 201 j.relist[1] = relist1; 202 j.reliste[0] = relist0 + nelem(relist0) - 2; 203 j.reliste[1] = relist1 + nelem(relist1) - 2; 204 205 rv = rregexec1(progp, bol, mp, ms, &j); 206 if(rv >= 0) 207 return rv; 208 rv = rregexec2(progp, bol, mp, ms, &j); 209 if(rv >= 0) 210 return rv; 211 return -1; 212 }