diff --git a/sys/man/2/regexp b/sys/man/2/regexp index 7760dfe0e..a4b58969b 100644 --- a/sys/man/2/regexp +++ b/sys/man/2/regexp @@ -210,6 +210,8 @@ if is not matched. .SH BUGS There is no way to specify or match a NUL character; NULs terminate patterns and strings. +The size of a character class and the number of sub-expression matches are hard-coded +limits. The library uses the worst-case space estimate for allocating VM runtime threads. .SH HISTORY .IR Regexp (2) first appeared in Plan 9 from Bell Labs. This implementation was written from diff --git a/sys/src/libregexp/regcomp.c b/sys/src/libregexp/regcomp.c index 3bbc26ca9..094f94779 100644 --- a/sys/src/libregexp/regcomp.c +++ b/sys/src/libregexp/regcomp.c @@ -308,7 +308,7 @@ getnextr(Parselex *l) { l->literal = 0; if(l->done) { - l->rune = 0; + l->rune = L'\0'; return; } l->rawexp += chartorune(&l->rune, l->rawexp); @@ -327,7 +327,7 @@ getnextrlit(Parselex *l) l->literal = 1; if(l->done) { l->literal = 0; - l->rune = 0; + l->rune = L'\0'; return; } l->rawexp += chartorune(&l->rune, l->rawexp); @@ -347,7 +347,7 @@ lex(Parselex *l) if(l->literal) return l->peeklex = LRUNE; switch(l->rune){ - case 0: + case L'\0': return l->peeklex = LEND; case L'*': case L'?': @@ -375,16 +375,20 @@ lex(Parselex *l) static int pcmp(void *va, void *vb) { - vlong n; Rune *a, *b; a = va; b = vb; - n = (vlong)b[0] - (vlong)a[0]; - if(n) - return n; - return (vlong)b[1] - (vlong)a[1]; + if(a[0] < b[0]) + return 1; + if(a[0] > b[0]) + return -1; + if(a[1] < b[1]) + return 1; + if(a[1] > b[1]) + return -1; + return 0; } static void @@ -460,7 +464,7 @@ getclass(Parselex *l) q[2] = 0; } -/* classes are in descending order */ +/* classes are in descending order see pcmp */ static Renode* buildclassn(Parselex *l) { diff --git a/sys/src/libregexp/regimpl.h b/sys/src/libregexp/regimpl.h index 8785a27f0..8c8ac03a7 100644 --- a/sys/src/libregexp/regimpl.h +++ b/sys/src/libregexp/regimpl.h @@ -23,7 +23,7 @@ enum { TSTAR, TSUB, - NSUBEXPM = 32 + NSUBEXPM = 32, }; typedef struct Parselex Parselex; @@ -31,30 +31,22 @@ typedef struct Renode Renode; struct Parselex { /* Parse */ - Renode *next; - Renode *nodes; - int sub; - int instrs; + Renode *next, *nodes; + int sub, instrs; jmp_buf exitenv; /* Lex */ void (*getnextr)(Parselex*); - char *rawexp; - char *orig; + char *rawexp, *orig; Rune rune; - Rune peek; - int peeklex; - int done; - int literal; + int peek, peeklex, done, literal, nc; Rune cpairs[400+2]; - int nc; }; struct Renode { int op; Renode *left; Rune r; - union - { + union { Rune r1; int sub; Renode *right; @@ -73,13 +65,11 @@ struct Reinst { char op; int gen; Reinst *a; - union - { + union { Rune r; int sub; }; - union - { + union { Rune r1; Reinst *b; };