287 lines
4.2 KiB
C
287 lines
4.2 KiB
C
#include "grep.h"
|
|
|
|
/*
|
|
* incremental compiler.
|
|
* add the branch c to the
|
|
* state s.
|
|
*/
|
|
void
|
|
increment(State *s, int c)
|
|
{
|
|
int i;
|
|
State *t, **tt;
|
|
Re *re1, *re2;
|
|
|
|
nfollow = 0;
|
|
gen++;
|
|
matched = 0;
|
|
for(i=0; i<s->count; i++)
|
|
fol1(s->re[i], c);
|
|
qsort(follow, nfollow, sizeof(*follow), fcmp);
|
|
for(tt=&state0; t = *tt;) {
|
|
if(t->count > nfollow) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if(t->count < nfollow) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
for(i=0; i<nfollow; i++) {
|
|
re1 = t->re[i];
|
|
re2 = follow[i];
|
|
if(re1 > re2) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if(re1 < re2) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
}
|
|
if(!!matched && !t->match) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if(!matched && !!t->match) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
s->next[c] = t;
|
|
return;
|
|
cont:;
|
|
}
|
|
|
|
t = sal(nfollow);
|
|
*tt = t;
|
|
for(i=0; i<nfollow; i++) {
|
|
re1 = follow[i];
|
|
t->re[i] = re1;
|
|
}
|
|
s->next[c] = t;
|
|
t->match = matched;
|
|
}
|
|
|
|
int
|
|
fcmp(void *va, void *vb)
|
|
{
|
|
Re **aa, **bb;
|
|
Re *a, *b;
|
|
|
|
aa = va;
|
|
bb = vb;
|
|
a = *aa;
|
|
b = *bb;
|
|
if(a > b)
|
|
return 1;
|
|
if(a < b)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
fol1(Re *r, int c)
|
|
{
|
|
Re *r1;
|
|
|
|
loop:
|
|
if(r->gen == gen)
|
|
return;
|
|
if(nfollow >= maxfollow)
|
|
error("nfollow");
|
|
r->gen = gen;
|
|
switch(r->type) {
|
|
default:
|
|
error("fol1");
|
|
|
|
case Tcase:
|
|
if(c >= 0 && c < 256)
|
|
if(r1 = r->cases[c])
|
|
follow[nfollow++] = r1;
|
|
if(r = r->next)
|
|
goto loop;
|
|
break;
|
|
|
|
case Talt:
|
|
case Tor:
|
|
fol1(r->alt, c);
|
|
r = r->next;
|
|
goto loop;
|
|
|
|
case Tbegin:
|
|
if(c == '\n' || c == Cbegin)
|
|
follow[nfollow++] = r->next;
|
|
break;
|
|
|
|
case Tend:
|
|
if(c == '\n') {
|
|
if(r->next == 0) {
|
|
matched = 1;
|
|
break;
|
|
}
|
|
r = r->next;
|
|
goto loop;
|
|
}
|
|
break;
|
|
|
|
case Tclass:
|
|
if(c >= r->lo && c <= r->hi)
|
|
follow[nfollow++] = r->next;
|
|
break;
|
|
}
|
|
}
|
|
|
|
int tab1[] =
|
|
{
|
|
0x007f,
|
|
0x07ff,
|
|
0xffff,
|
|
};
|
|
int tab2[] =
|
|
{
|
|
0x003f,
|
|
0x0fff,
|
|
0x3ffff,
|
|
};
|
|
|
|
Re2
|
|
rclass(Rune p0, Rune p1)
|
|
{
|
|
char xc0[6], xc1[6];
|
|
int i, n, m;
|
|
Re2 x;
|
|
|
|
if(p0 > p1)
|
|
return re2char(0xff, 0xff); // no match
|
|
|
|
/*
|
|
* bust range into same length
|
|
* character sequences
|
|
*/
|
|
for(i=0; i<nelem(tab1); i++) {
|
|
m = tab1[i];
|
|
if(p0 <= m && p1 > m)
|
|
return re2or(rclass(p0, m), rclass(m+1, p1));
|
|
}
|
|
|
|
/*
|
|
* bust range into part of a single page
|
|
* or into full pages
|
|
*/
|
|
for(i=0; i<nelem(tab2); i++) {
|
|
m = tab2[i];
|
|
if((p0 & ~m) != (p1 & ~m)) {
|
|
if((p0 & m) != 0)
|
|
return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
|
|
if((p1 & m) != m)
|
|
return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
|
|
}
|
|
}
|
|
|
|
n = runetochar(xc0, &p0);
|
|
i = runetochar(xc1, &p1);
|
|
if(i != n)
|
|
error("length");
|
|
|
|
x = re2char(xc0[0], xc1[0]);
|
|
for(i=1; i<n; i++)
|
|
x = re2cat(x, re2char(xc0[i], xc1[i]));
|
|
return x;
|
|
}
|
|
|
|
int
|
|
pcmp(void *va, void *vb)
|
|
{
|
|
int n;
|
|
Rune *a, *b;
|
|
|
|
a = va;
|
|
b = vb;
|
|
|
|
n = a[0] - b[0];
|
|
if(n)
|
|
return n;
|
|
return a[1] - b[1];
|
|
}
|
|
|
|
/*
|
|
* convert character chass into
|
|
* run-pair ranges of matches.
|
|
* this is 10646/utf specific and
|
|
* needs to be changed for some
|
|
* other input character set.
|
|
* this is the key to a fast
|
|
* regular search of characters
|
|
* by looking at sequential bytes.
|
|
*/
|
|
Re2
|
|
re2class(char *s)
|
|
{
|
|
Rune pairs[200+2], *p, *q, ov;
|
|
int nc;
|
|
Re2 x;
|
|
|
|
nc = 0;
|
|
if(*s == '^') {
|
|
nc = 1;
|
|
s++;
|
|
}
|
|
|
|
p = pairs;
|
|
s += chartorune(p, s);
|
|
for(;;) {
|
|
if(*p == '\\')
|
|
s += chartorune(p, s);
|
|
if(*p == 0)
|
|
break;
|
|
p[1] = *p;
|
|
p += 2;
|
|
if(p >= pairs + nelem(pairs) - 2)
|
|
error("class too big");
|
|
s += chartorune(p, s);
|
|
if(*p != '-')
|
|
continue;
|
|
s += chartorune(p, s);
|
|
if(*p == '\\')
|
|
s += chartorune(p, s);
|
|
if(*p == 0)
|
|
break;
|
|
p[-1] = *p;
|
|
s += chartorune(p, s);
|
|
}
|
|
*p = 0;
|
|
qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);
|
|
|
|
q = pairs;
|
|
for(p=pairs+2; *p; p+=2) {
|
|
if(p[0] > p[1])
|
|
continue;
|
|
if(p[0] > q[1] || p[1] < q[0]) {
|
|
q[2] = p[0];
|
|
q[3] = p[1];
|
|
q += 2;
|
|
continue;
|
|
}
|
|
if(p[0] < q[0])
|
|
q[0] = p[0];
|
|
if(p[1] > q[1])
|
|
q[1] = p[1];
|
|
}
|
|
q[2] = 0;
|
|
|
|
p = pairs;
|
|
if(nc) {
|
|
x = rclass(0, p[0]-1);
|
|
ov = p[1]+1;
|
|
for(p+=2; *p; p+=2) {
|
|
x = re2or(x, rclass(ov, p[0]-1));
|
|
ov = p[1]+1;
|
|
}
|
|
x = re2or(x, rclass(ov, Runemax));
|
|
} else {
|
|
x = rclass(p[0], p[1]);
|
|
for(p+=2; *p; p+=2)
|
|
x = re2or(x, rclass(p[0], p[1]));
|
|
}
|
|
return x;
|
|
}
|