diff --git a/sys/src/libregexp/regcomp.c b/sys/src/libregexp/regcomp.c index ad3b8cd96..3e3ba6ae5 100644 --- a/sys/src/libregexp/regcomp.c +++ b/sys/src/libregexp/regcomp.c @@ -156,17 +156,6 @@ initplex(Parselex *plex, char *regstr, int lit) return plex; } -static int -maxthreads(Renode *tree) -{ - tree = tree->left; - if(tree->op == TCAT) - tree = tree->left; - if(tree->op == TBOL) - return 2; - return -1; -} - static Reprog* regcomp1(char *regstr, int nl, int lit) { @@ -187,7 +176,7 @@ regcomp1(char *regstr, int nl, int lit) return nil; } - maxthr = regstrlen; + maxthr = regstrlen + 1; parsetr = node(&plex, TSUB, e0(&plex), nil); // prtree(parsetr, 0, 1); @@ -304,12 +293,13 @@ Tailcall: static Reinst* compile(Renode *parsetr, Reprog *reprog, int nl) { - Reinst *reinst; + Reinst *reinst, *end; int sub; sub = 0; reinst = (Reinst*)(reprog+1); - compile1(parsetr, reinst, &sub, nl); + end = compile1(parsetr, reinst, &sub, nl); + assert(reinst + reprog->len == end); return reinst; } diff --git a/sys/src/libregexp/regexec.c b/sys/src/libregexp/regexec.c index 9fa9a1229..e085abc2a 100644 --- a/sys/src/libregexp/regexec.c +++ b/sys/src/libregexp/regexec.c @@ -4,30 +4,31 @@ #include "regimpl.h" typedef struct RethreadQ RethreadQ; -struct RethreadQ -{ +struct RethreadQ { Rethread *head; Rethread **tail; }; int -regexec(Reprog *prog, char *str, Resub *sem, int msize) +regexec(Reprog *p, char *str, Resub *sem, int msize) { RethreadQ lists[2], *clist, *nlist, *tmp; - Rethread *t, *next, *pooltop, *avail; - Reinst *curinst; + Rethread *t, *next, *pool, *avail; + Reinst *ci; Rune r; char *sp, *ep, endc; - int i, match, first, gen, matchpri, pri; + int i, matchgen, gen; if(msize > NSUBEXPM) msize = NSUBEXPM; - if(prog->startinst->gen != 0) { - for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) - curinst->gen = 0; + if(p->startinst->gen != 0) { + for(ci = p->startinst; ci < p->startinst + p->len; ci++) + ci->gen = 0; } + memset(p->threads, 0, sizeof(Rethread)*p->nthr); + clist = lists; clist->head = nil; clist->tail = &clist->head; @@ -35,10 +36,10 @@ regexec(Reprog *prog, char *str, Resub *sem, int msize) nlist->head = nil; nlist->tail = &nlist->head; - pooltop = prog->threads + prog->nthr; + pool = p->threads; avail = nil; - pri = matchpri = gen = match = 0; + gen = matchgen = 0; sp = str; ep = nil; endc = '\0'; @@ -51,109 +52,120 @@ regexec(Reprog *prog, char *str, Resub *sem, int msize) *sem->ep = '\0'; } } - r = Runemax + 1; - for(; r != L'\0'; sp += i) { - gen++; + for(r = 1; r != L'\0'; sp += i) { i = chartorune(&r, sp); - first = 1; + gen++; + if(matchgen == 0) { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + t = pool++; + } else { + t = avail; + avail = avail->next; + } + t->i = p->startinst; + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + t->next = nil; + t->gen = gen; + *clist->tail = t; + clist->tail = &t->next; + } t = clist->head; if(t == nil) - goto Start; - curinst = t->pc; + break; + ci = t->i; Again: - if(curinst->gen == gen) + if(ci->gen == gen || matchgen && t->gen > matchgen) goto Done; - curinst->gen = gen; - switch(curinst->op) { + ci->gen = gen; + switch(ci->op) { case ORUNE: - if(r != curinst->r) + if(r != ci->r) goto Done; case OANY: /* fallthrough */ next = t->next; - t->pc = curinst + 1; + t->i = ci + 1; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case OCLASS: Class: - if(r < curinst->r) + if(r < ci->r) goto Done; - if(r > curinst->r1) { - curinst++; + if(r > ci->r1) { + ci++; goto Class; } next = t->next; - t->pc = curinst->a; + t->i = ci->a; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case ONOTNL: if(r != L'\n') { - curinst++; + ci++; goto Again; } goto Done; case OBOL: if(sp == str || sp[-1] == '\n') { - curinst++; + ci++; goto Again; } goto Done; case OEOL: if(r == L'\n' || r == L'\0' && ep == nil) { - curinst++; + ci++; goto Again; } goto Done; case OJMP: - curinst = curinst->a; + ci = ci->a; goto Again; case OSPLIT: - if(avail == nil) - next = --pooltop; - else { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + next = pool++; + } else { next = avail; avail = avail->next; } - next->pc = curinst->b; + next->i = ci->b; if(msize > 0) memcpy(next->sem, t->sem, sizeof(Resub)*msize); - next->pri = t->pri; next->next = t->next; + next->gen = t->gen; t->next = next; - curinst = curinst->a; + ci = ci->a; goto Again; case OSAVE: - if(curinst->sub < msize) - t->sem[curinst->sub].sp = sp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].sp = sp; + ci++; goto Again; case OUNSAVE: - if(curinst->sub == 0) { - /* "Highest" priority is the left-most longest. */ - if (t->pri > matchpri) - goto Done; - match = 1; - matchpri = t->pri; + if(ci->sub == 0) { + matchgen = t->gen; if(sem != nil && msize > 0) { memcpy(sem, t->sem, sizeof(Resub)*msize); sem->ep = sp; } goto Done; } - if(curinst->sub < msize) - t->sem[curinst->sub].ep = sp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].ep = sp; + ci++; goto Again; Done: next = t->next; @@ -162,30 +174,9 @@ Again: if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; } -Start: - /* Start again once if we haven't found anything. */ - if(first == 1 && match == 0) { - first = 0; - if(avail == nil) - t = --pooltop; - else { - t = avail; - avail = avail->next; - } - if(msize > 0) - memset(t->sem, 0, sizeof(Resub)*msize); - /* "Lower" priority thread */ - t->pri = matchpri = pri++; - t->next = nil; - curinst = prog->startinst; - goto Again; - } - /* If we have a match and no extant threads, we are done. */ - if(match == 1 && nlist->head == nil) - break; tmp = clist; clist = nlist; nlist = tmp; @@ -194,5 +185,5 @@ Start: } if(ep != nil) *ep = endc; - return match; + return matchgen > 0 ? 1 : 0; } diff --git a/sys/src/libregexp/regimpl.h b/sys/src/libregexp/regimpl.h index f173a970c..8785a27f0 100644 --- a/sys/src/libregexp/regimpl.h +++ b/sys/src/libregexp/regimpl.h @@ -1,5 +1,4 @@ -enum -{ +enum { LANY = 0, LBOL, LCLASS, @@ -30,8 +29,7 @@ enum typedef struct Parselex Parselex; typedef struct Renode Renode; -struct Parselex -{ +struct Parselex { /* Parse */ Renode *next; Renode *nodes; @@ -50,8 +48,8 @@ struct Parselex Rune cpairs[400+2]; int nc; }; -struct Renode -{ + +struct Renode { int op; Renode *left; Rune r; @@ -63,15 +61,15 @@ struct Renode }; int nclass; }; -struct Rethread -{ - Reinst *pc; + +struct Rethread { + Reinst *i; Resub sem[NSUBEXPM]; - int pri; Rethread *next; + int gen; }; -struct Reinst -{ + +struct Reinst { char op; int gen; Reinst *a; diff --git a/sys/src/libregexp/rregexec.c b/sys/src/libregexp/rregexec.c index b1320de4b..663bf4569 100644 --- a/sys/src/libregexp/rregexec.c +++ b/sys/src/libregexp/rregexec.c @@ -4,29 +4,30 @@ #include "regimpl.h" typedef struct RethreadQ RethreadQ; -struct RethreadQ -{ +struct RethreadQ { Rethread *head; Rethread **tail; }; int -rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) +rregexec(Reprog *p, Rune *str, Resub *sem, int msize) { RethreadQ lists[2], *clist, *nlist, *tmp; - Rethread *t, *next, *pooltop, *avail; - Reinst *curinst; - Rune *rsp, *rep, endr, last; - int match, first, gen, pri, matchpri; + Rethread *t, *next, *pool, *avail; + Reinst *ci; + Rune *rsp, *rep, endr, r; + int matchgen, gen; if(msize > NSUBEXPM) msize = NSUBEXPM; - if(prog->startinst->gen != 0) { - for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) - curinst->gen = 0; + if(p->startinst->gen != 0) { + for(ci = p->startinst; ci < p->startinst + p->len; ci++) + ci->gen = 0; } + memset(p->threads, 0, sizeof(Rethread)*p->nthr); + clist = lists; clist->head = nil; clist->tail = &clist->head; @@ -34,10 +35,10 @@ rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) nlist->head = nil; nlist->tail = &nlist->head; - pooltop = prog->threads + prog->nthr; + pool = p->threads; avail = nil; - pri = matchpri = gen = match = 0; + gen = matchgen = 0; rsp = str; rep = nil; endr = L'\0'; @@ -50,109 +51,120 @@ rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) *sem->rep = '\0'; } } - last = 1; - for(; last != L'\0'; rsp++) { + for(r = 1; r != L'\0'; rsp++) { + r = *rsp; gen++; - last = *rsp; - first = 1; + if(matchgen == 0) { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + t = pool++; + } else { + t = avail; + avail = avail->next; + } + t->i = p->startinst; + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + t->next = nil; + t->gen = gen; + *clist->tail = t; + clist->tail = &t->next; + } t = clist->head; if(t == nil) - goto Start; - curinst = t->pc; + break; + ci = t->i; Again: - if(curinst->gen == gen) + if(ci->gen == gen || matchgen && t->gen > matchgen) goto Done; - curinst->gen = gen; - switch(curinst->op) { + ci->gen = gen; + switch(ci->op) { case ORUNE: - if(*rsp != curinst->r) + if(r != ci->r) goto Done; case OANY: /* fallthrough */ next = t->next; - t->pc = curinst + 1; + t->i = ci + 1; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case OCLASS: Class: - if(*rsp < curinst->r) + if(r < ci->r) goto Done; - if(*rsp > curinst->r1) { - curinst++; + if(r > ci->r1) { + ci++; goto Class; } next = t->next; - t->pc = curinst->a; + t->i = ci->a; t->next = nil; *nlist->tail = t; nlist->tail = &t->next; if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; case ONOTNL: - if(*rsp != L'\n') { - curinst++; + if(r != L'\n') { + ci++; goto Again; } goto Done; case OBOL: if(rsp == str || rsp[-1] == L'\n') { - curinst++; + ci++; goto Again; } goto Done; case OEOL: - if(*rsp == '\n' || *rsp == L'\0' && rep == nil) { - curinst++; + if(r == L'\n' || r == L'\0' && rep == nil) { + ci++; goto Again; } goto Done; case OJMP: - curinst = curinst->a; + ci = ci->a; goto Again; case OSPLIT: - if(avail == nil) - next = --pooltop; - else { + if(avail == nil) { + assert(pool < p->threads + p->nthr); + next = pool++; + } else { next = avail; avail = avail->next; } - next->pc = curinst->b; + next->i = ci->b; if(msize > 0) memcpy(next->sem, t->sem, sizeof(Resub)*msize); - next->pri = t->pri; next->next = t->next; + next->gen = t->gen; t->next = next; - curinst = curinst->a; + ci = ci->a; goto Again; case OSAVE: - if(curinst->sub < msize) - t->sem[curinst->sub].rsp = rsp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].rsp = rsp; + ci++; goto Again; case OUNSAVE: - if(curinst->sub == 0) { - /* "Highest" priority is the left-most longest. */ - if (t->pri > matchpri) - goto Done; - match = 1; - matchpri = t->pri; + if(ci->sub == 0) { + matchgen = t->gen; if(sem != nil && msize > 0) { memcpy(sem, t->sem, sizeof(Resub)*msize); sem->rep = rsp; } goto Done; } - if(curinst->sub < msize) - t->sem[curinst->sub].rep = rsp; - curinst++; + if(ci->sub < msize) + t->sem[ci->sub].rep = rsp; + ci++; goto Again; Done: next = t->next; @@ -161,30 +173,9 @@ Again: if(next == nil) break; t = next; - curinst = t->pc; + ci = t->i; goto Again; } -Start: - /* Start again once if we haven't found anything. */ - if(first == 1 && match == 0) { - first = 0; - if(avail == nil) - t = --pooltop; - else { - t = avail; - avail = avail->next; - } - if(msize > 0) - memset(t->sem, 0, sizeof(Resub)*msize); - /* "Lower" priority thread */ - t->pri = matchpri = pri++; - t->next = nil; - curinst = prog->startinst; - goto Again; - } - /* If we have a match and no extant threads, we are done. */ - if(match == 1 && nlist->head == nil) - break; tmp = clist; clist = nlist; nlist = tmp; @@ -193,5 +184,5 @@ Start: } if(rep != nil) *rep = endr; - return match; + return matchgen > 0 ? 1 : 0; }