grep: fix wrong rlcass splitting (thanks erik and kenji)

add 0xffff to tab1 as range 0xffff-0x10ffff has 4 byte utf-8 sequence.
use Runemax (0x10ffff) instead of Runemask (0x1fffff) to denote
the last valid rune for inverted [^] match as Runemask is out of the
valid rune space.
This commit is contained in:
cinap_lenrek 2014-03-30 04:29:04 +02:00
parent cde97a4d5f
commit 4d0a446123

View file

@ -135,11 +135,13 @@ Rune tab1[] =
{ {
0x007f, 0x007f,
0x07ff, 0x07ff,
0xffff,
}; };
Rune tab2[] = Rune tab2[] =
{ {
0x003f, 0x003f,
0x0fff, 0x0fff,
0xffff,
}; };
Re2 Re2
@ -275,7 +277,7 @@ re2class(char *s)
x = re2or(x, rclass(ov, p[0]-1)); x = re2or(x, rclass(ov, p[0]-1));
ov = p[1]+1; ov = p[1]+1;
} }
x = re2or(x, rclass(ov, Runemask)); x = re2or(x, rclass(ov, Runemax));
} else { } else {
x = rclass(p[0], p[1]); x = rclass(p[0], p[1]);
for(p+=2; *p; p+=2) for(p+=2; *p; p+=2)