hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

rregexec.c (4729B)


      1 #include "lib9.h"
      2 #include "regexp9.h"
      3 #include "regcomp.h"
      4 
      5 /*
      6  *  return	0 if no match
      7  *		>0 if a match
      8  *		<0 if we ran out of _relist space
      9  */
     10 static int
     11 rregexec1(Reprog *progp,	/* program to run */
     12 	Rune *bol,		/* string to run machine on */
     13 	Resub *mp,		/* subexpression elements */
     14 	int ms,			/* number of elements at mp */
     15 	Reljunk *j)
     16 {
     17 	int flag=0;
     18 	Reinst *inst;
     19 	Relist *tlp;
     20 	Rune *s;
     21 	int i, checkstart;
     22 	Rune r, *rp, *ep;
     23 	Relist* tl;		/* This list, next list */
     24 	Relist* nl;
     25 	Relist* tle;		/* ends of this and next list */
     26 	Relist* nle;
     27 	int match;
     28 	Rune *p;
     29 
     30 	match = 0;
     31 	checkstart = j->startchar;
     32 	if(mp)
     33 		for(i=0; i<ms; i++) {
     34 			mp[i].s.rsp = 0;
     35 			mp[i].e.rep = 0;
     36 		}
     37 	j->relist[0][0].inst = 0;
     38 	j->relist[1][0].inst = 0;
     39 
     40 	/* Execute machine once for each character, including terminal NUL */
     41 	s = j->rstarts;
     42 	do{
     43 
     44 		/* fast check for first char */
     45 		if(checkstart) {
     46 			switch(j->starttype) {
     47 			case RUNE:
     48 				p = runestrchr(s, j->startchar);
     49 				if(p == 0 || p == j->reol)
     50 					return match;
     51 				s = p;
     52 				break;
     53 			case BOL:
     54 				if(s == bol)
     55 					break;
     56 				p = runestrchr(s, '\n');
     57 				if(p == 0 || s == j->reol)
     58 					return match;
     59 				s = p+1;
     60 				break;
     61 			}
     62 		}
     63 
     64 		r = *s;
     65 
     66 		/* switch run lists */
     67 		tl = j->relist[flag];
     68 		tle = j->reliste[flag];
     69 		nl = j->relist[flag^=1];
     70 		nle = j->reliste[flag];
     71 		nl->inst = 0;
     72 
     73 		/* Add first instruction to current list */
     74 		_rrenewemptythread(tl, progp->startinst, ms, s);
     75 
     76 		/* Execute machine until current list is empty */
     77 		for(tlp=tl; tlp->inst; tlp++){
     78 			for(inst=tlp->inst; ; inst = inst->u2.next){
     79 				switch(inst->type){
     80 				case RUNE:	/* regular character */
     81 					if(inst->u1.r == r)
     82 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     83 							return -1;
     84 					break;
     85 				case LBRA:
     86 					tlp->se.m[inst->u1.subid].s.rsp = s;
     87 					continue;
     88 				case RBRA:
     89 					tlp->se.m[inst->u1.subid].e.rep = s;
     90 					continue;
     91 				case ANY:
     92 					if(r != '\n')
     93 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     94 							return -1;
     95 					break;
     96 				case ANYNL:
     97 					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     98 							return -1;
     99 					break;
    100 				case BOL:
    101 					if(s == bol || *(s-1) == '\n')
    102 						continue;
    103 					break;
    104 				case EOL:
    105 					if(s == j->reol || r == 0 || r == '\n')
    106 						continue;
    107 					break;
    108 				case CCLASS:
    109 					ep = inst->u1.cp->end;
    110 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    111 						if(r >= rp[0] && r <= rp[1]){
    112 							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    113 								return -1;
    114 							break;
    115 						}
    116 					break;
    117 				case NCCLASS:
    118 					ep = inst->u1.cp->end;
    119 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    120 						if(r >= rp[0] && r <= rp[1])
    121 							break;
    122 					if(rp == ep)
    123 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    124 							return -1;
    125 					break;
    126 				case OR:
    127 					/* evaluate right choice later */
    128 					if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
    129 						return -1;
    130 					/* efficiency: advance and re-evaluate */
    131 					continue;
    132 				case END:	/* Match! */
    133 					match = 1;
    134 					tlp->se.m[0].e.rep = s;
    135 					if(mp != 0)
    136 						_renewmatch(mp, ms, &tlp->se);
    137 					break;
    138 				}
    139 				break;
    140 			}
    141 		}
    142 		if(s == j->reol)
    143 			break;
    144 		checkstart = j->startchar && nl->inst==0;
    145 		s++;
    146 	}while(r);
    147 	return match;
    148 }
    149 
    150 static int
    151 rregexec2(Reprog *progp,	/* program to run */
    152 	Rune *bol,	/* string to run machine on */
    153 	Resub *mp,	/* subexpression elements */
    154 	int ms,		/* number of elements at mp */
    155 	Reljunk *j
    156 )
    157 {
    158 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
    159 
    160 	/* mark space */
    161 	j->relist[0] = relist0;
    162 	j->relist[1] = relist1;
    163 	j->reliste[0] = relist0 + nelem(relist0) - 2;
    164 	j->reliste[1] = relist1 + nelem(relist1) - 2;
    165 
    166 	return rregexec1(progp, bol, mp, ms, j);
    167 }
    168 
    169 extern int
    170 rregexec(Reprog *progp,	/* program to run */
    171 	Rune *bol,	/* string to run machine on */
    172 	Resub *mp,	/* subexpression elements */
    173 	int ms)		/* number of elements at mp */
    174 {
    175 	Reljunk j;
    176 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
    177 	int rv;
    178 
    179 	/*
    180  	 *  use user-specified starting/ending location if specified
    181 	 */
    182 	j.rstarts = bol;
    183 	j.reol = 0;
    184 	if(mp && ms>0){
    185 		if(mp->s.sp)
    186 			j.rstarts = mp->s.rsp;
    187 		if(mp->e.ep)
    188 			j.reol = mp->e.rep;
    189 	}
    190 	j.starttype = 0;
    191 	j.startchar = 0;
    192 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
    193 		j.starttype = RUNE;
    194 		j.startchar = progp->startinst->u1.r;
    195 	}
    196 	if(progp->startinst->type == BOL)
    197 		j.starttype = BOL;
    198 
    199 	/* mark space */
    200 	j.relist[0] = relist0;
    201 	j.relist[1] = relist1;
    202 	j.reliste[0] = relist0 + nelem(relist0) - 2;
    203 	j.reliste[1] = relist1 + nelem(relist1) - 2;
    204 
    205 	rv = rregexec1(progp, bol, mp, ms, &j);
    206 	if(rv >= 0)
    207 		return rv;
    208 	rv = rregexec2(progp, bol, mp, ms, &j);
    209 	if(rv >= 0)
    210 		return rv;
    211 	return -1;
    212 }