grep: fix wrong rlcass splitting (thanks erik and kenji)
add 0xffff to tab1 as range 0xffff-0x10ffff has 4 byte utf-8 sequence. use Runemax (0x10ffff) instead of Runemask (0x1fffff) to denote the last valid rune for inverted [^] match as Runemask is out of the valid rune space.
This commit is contained in:
parent
cde97a4d5f
commit
4d0a446123
1 changed files with 3 additions and 1 deletions
|
@ -135,11 +135,13 @@ Rune tab1[] =
|
|||
{
|
||||
0x007f,
|
||||
0x07ff,
|
||||
0xffff,
|
||||
};
|
||||
Rune tab2[] =
|
||||
{
|
||||
0x003f,
|
||||
0x0fff,
|
||||
0xffff,
|
||||
};
|
||||
|
||||
Re2
|
||||
|
@ -275,7 +277,7 @@ re2class(char *s)
|
|||
x = re2or(x, rclass(ov, p[0]-1));
|
||||
ov = p[1]+1;
|
||||
}
|
||||
x = re2or(x, rclass(ov, Runemask));
|
||||
x = re2or(x, rclass(ov, Runemax));
|
||||
} else {
|
||||
x = rclass(p[0], p[1]);
|
||||
for(p+=2; *p; p+=2)
|
||||
|
|
Loading…
Reference in a new issue