reactos/sdk/tools/mkisofs/schilytools/libschily/match.c

608 lines
11 KiB
C

/* @(#)match.c 1.27 17/08/13 Copyright 1985, 1995-2017 J. Schilling */
#include <schily/standard.h>
#include <schily/patmatch.h>
#define POSIX_CLASS /* Support [[:alpha:]] by default */
#ifdef NO_POSIX_CLASS /* Allow to disable [[:alpha:]] */
#undef POSIX_CLASS
#endif
#ifdef POSIX_CLASS
#include <schily/wchar.h> /* With [[:alpha:]], we need wctype() */
#include <schily/wctype.h> /* and thus wchar.h and wctype.h */
#endif
/*
* Pattern matching functions
*
* Copyright (c) 1985, 1995-2017 J. Schilling
*/
/*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* See the file CDDL.Schily.txt in this distribution for details.
* A copy of the CDDL is also available via the Internet at
* http://www.opensource.org/licenses/cddl1.txt
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file CDDL.Schily.txt from this distribution.
*/
/*
* The pattern matching functions below are based on the algorithm
* presented by Martin Richards in:
*
* "A Compact Function for Regular Expression Pattern Matching",
* Software-Practice and Experience, Vol. 9, 527-534 (1979)
*
* Several changes have been made to the original source which has been
* written in BCPL:
*
* '/' is replaced by '!' (to allow UNIX filenames)
* '(',')' are replaced by '{', '}'
* '\'' is replaced by '\\' (UNIX compatible quote)
*
* Character classes have been added to allow "[<character list>]"
* to be used.
* POSIX features like [[:alpha:]] have been added.
* Start of line '^' and end of line '$' have been added.
*/
#undef CHAR
#ifdef __LINE_MATCH
#ifdef __WIDE_CHAR
#define patmatch patwlmatch
#else
#define opatmatch opatlmatch
#define patmatch patlmatch
#endif
#endif
#ifdef __WIDE_CHAR
#ifndef __LINE_MATCH
#define patcompile patwcompile
#define patmatch patwmatch
#endif
#define CHAR wchar_t
#define PCHAR wchar_t
#endif
#ifdef __MB_CHAR
#undef patmatch
#ifdef __LINE_MATCH
#define patmatch patmblmatch
#else
#define patmatch patmbmatch
#endif
#define PCHAR wchar_t
#endif
#ifndef CHAR
typedef unsigned char Uchar;
#define DID_UCHAR_TYPE
#define CHAR Uchar
#endif
#ifndef PCHAR
#ifndef DID_UCHAR_TYPE
typedef unsigned char Uchar;
#endif
#define PCHAR Uchar
#endif
#define ENDSTATE (-1)
#define CL_SIZE 32 /* Max size for '[: :]' */
/*
* The Interpreter
*/
/*
* put adds a new state to the active list
*/
#define put(ret, state, sp, n) { \
register int *lstate = state; \
register int *lsp = sp; \
register int ln = n; \
\
while (lstate < lsp) { \
if (*lstate++ == ln) { \
ret = lsp; \
lsp = 0; \
break; \
} \
} \
if (lsp) { \
*lstate++ = ln; \
ret = lstate; \
} \
}
/*
* match a character in class
*
* Syntax errors do not appear here, they are handled by the compiler,
* so in theory we could remove the "return (0)" statements from the
* the POSIX class code.
*/
#ifdef POSIX_CLASS
#define CHK_POSIX_CLASS \
else if (*lpat == LCLASS) { \
if (lpat[1] == ':') { \
char class[CL_SIZE+1]; \
char *pc = class; \
\
lpat += 2; /* Eat ':' */ \
for (;;) { \
if (*lpat == '\0') { \
ok = FALSE; \
goto out; \
} \
if (*lpat == ':' && lpat[1] == RCLASS) \
break; \
if (pc >= &class[CL_SIZE]) { \
ok = FALSE; \
goto out; \
} \
*pc++ = *lpat++; \
} \
if (pc == class) { \
ok = FALSE; \
goto out; \
} \
*pc = '\0'; \
lpat += 2; /* Skip ":]" */ \
if (iswctype(lc, wctype(class))) { \
ok = !ok; \
goto out; \
} \
continue; \
} \
}
#else
#define CHK_POSIX_CLASS
#endif
#define in_class(found, pat, c) { \
register const PCHAR *lpat = pat; \
register int lc = c; \
int lo_bound; \
int hi_bound; \
BOOL ok = FALSE; \
\
if (*lpat == NOT) { \
lpat++; \
ok = TRUE; \
} \
while (*lpat != RCLASS) { \
if (*lpat == QUOTE) \
lpat++; \
CHK_POSIX_CLASS \
lo_bound = *lpat++; \
if (*lpat == RANGE) { \
lpat++; \
if (*lpat == QUOTE) \
lpat++; \
hi_bound = *lpat++; \
} else { \
hi_bound = lo_bound; \
} \
if (lo_bound <= lc && lc <= hi_bound) { \
ok = !ok; \
goto out; \
} \
} \
out: \
found = ok; \
}
/*
* opatmatch - the old external interpreter interface.
*
* Trys to match a string beginning at offset
* against the compiled pattern.
*/
#if !defined(__WIDE_CHAR) && !defined(__MB_CHAR)
EXPORT CHAR
*opatmatch(pat, aux, str, soff, slen, alt)
const PCHAR *pat;
const int *aux;
const CHAR *str;
int soff;
int slen;
int alt;
{
int state[MAXPAT];
return (patmatch(pat, aux, str, soff, slen, alt, state));
}
#endif
/*
* patmatch - the external interpreter interface.
*
* Trys to match a string beginning at offset
* against the compiled pattern.
*/
EXPORT CHAR *
patmatch(pat, aux, str, soff, slen, alt, state)
const PCHAR *pat;
const int *aux;
const CHAR *str;
int soff;
int slen;
int alt;
int state[];
{
register int *sp;
register int *n;
register int *i;
register int p;
register int q, s, k;
#ifdef __MB_CHAR
wchar_t c;
int mlen = 1;
#else
int c;
#endif
const CHAR *lastp = (CHAR *)NULL;
#ifdef __LINE_MATCH
for (; soff <= slen; soff++) {
#endif
sp = state;
put(sp, state, state, 0);
if (alt != ENDSTATE)
put(sp, state, sp, alt);
#ifdef __MB_CHAR
mbtowc(NULL, NULL, 0);
for (s = soff; ; s += mlen) {
#else
for (s = soff; ; s++) {
#endif
/*
* next char from input string
*/
if (s >= slen) {
c = 0;
} else {
#ifdef __MB_CHAR
mlen = mbtowc(&c, (char *)str, slen - s);
if (mlen < 0) {
mbtowc(NULL, NULL, 0);
c = str[s];
mlen = 1;
}
#else
c = str[s];
#endif
}
/*
* first complete the closure
*/
for (n = state; n < sp; ) {
p = *n++; /* next state number */
if (p == ENDSTATE)
continue;
q = aux[p]; /* get next state for pat */
k = pat[p]; /* get next char from pat */
switch (k) {
case REP:
put(sp, state, sp, p+1);
/* FALLTHRU */
case NIL: /* NIL matches always */
case STAR:
put(sp, state, sp, q);
break;
case LBRACK: /* alternations */
case ALT:
put(sp, state, sp, p+1);
if (q != ENDSTATE)
put(sp, state, sp, q);
break;
case START:
if (s == 0)
put(sp, state, sp, q);
break;
case END:
if (c == '\0')
put(sp, state, sp, q);
break;
}
}
for (i = state; i < sp; ) {
if (*i++ == ENDSTATE) {
lastp = &str[s];
break;
}
}
if (c == 0)
return ((CHAR *)lastp);
/*
* now try to match next character
*/
n = sp;
sp = state;
for (i = sp; i < n; ) {
p = *i++; /* next active state number */
if (p == ENDSTATE)
continue;
k = pat[p];
switch (k) {
case ALT:
case REP:
case NIL:
case LBRACK:
case START:
case END:
continue;
case LCLASS:
in_class(q, &pat[p+1], c);
if (!q)
continue;
break;
case STAR:
put(sp, state, sp, p);
continue;
case QUOTE:
k = pat[p+1];
default:
if (k != c)
continue;
/* FALLTHRU */
case ANY:
break;
}
put(sp, state, sp, aux[p]);
}
if (sp == state) { /* if no new states return */
#ifdef __LINE_MATCH
if (lastp || (soff == slen - 1))
return ((CHAR *)lastp);
else
break;
#else
return ((CHAR *)lastp);
#endif
}
}
#ifdef __LINE_MATCH
}
return ((CHAR *)lastp);
#endif
}
#if !defined(__LINE_MATCH) && !defined(__MB_CHAR)
/*
* The Compiler
*/
typedef struct args {
const PCHAR *pattern;
int *aux;
int patp;
int length;
PCHAR Ch;
} arg_t;
LOCAL void nextitem __PR((arg_t *));
LOCAL int prim __PR((arg_t *));
LOCAL int expr __PR((arg_t *, int *));
LOCAL void setexits __PR((int *, int, int));
LOCAL int join __PR((int *, int, int));
/*
* 'read' the next character from pattern
*/
#define rch(ap) \
{ \
if (++(ap)->patp >= (ap)->length) \
(ap)->Ch = 0; \
else \
(ap)->Ch = (ap)->pattern[(ap)->patp]; \
}
/*
* 'peek' the next character from pattern
*/
#define pch(ap) \
((((ap)->patp + 1) >= (ap)->length) ? \
0 \
: \
(ap)->pattern[(ap)->patp+1]) \
/*
* get the next item from pattern
*/
LOCAL void
nextitem(ap)
arg_t *ap;
{
if (ap->Ch == QUOTE)
rch(ap);
rch(ap);
}
/*
* parse a primary
*/
LOCAL int
prim(ap)
arg_t *ap;
{
int a = ap->patp;
int op = ap->Ch;
int t;
nextitem(ap);
switch (op) {
case '\0':
case ALT:
case RBRACK:
return (ENDSTATE);
case LCLASS:
while (ap->Ch != RCLASS && ap->Ch != '\0') {
#ifdef POSIX_CLASS
if (ap->Ch == LCLASS) {
if (pch(ap) == ':') { /* [:alpha:] */
char class[CL_SIZE+1];
char *pc = class;
nextitem(ap);
nextitem(ap);
while (ap->Ch != ':' &&
ap->Ch != '\0') {
if (pc > &class[CL_SIZE])
return (ENDSTATE);
*pc = ap->Ch;
if (*pc++ != ap->Ch)
return (ENDSTATE);
nextitem(ap);
}
if (pc == class)
return (ENDSTATE);
*pc = '\0';
if (ap->Ch == '\0')
return (ENDSTATE);
if (wctype(class) == 0)
return (ENDSTATE);
nextitem(ap);
}
if (ap->Ch != RCLASS)
return (ENDSTATE);
}
#endif
nextitem(ap);
}
if (ap->Ch == '\0')
return (ENDSTATE);
nextitem(ap);
break;
case REP:
t = prim(ap);
if (t == ENDSTATE)
return (ENDSTATE);
setexits(ap->aux, t, a);
break;
case LBRACK:
a = expr(ap, &ap->aux[a]);
if (a == ENDSTATE || ap->Ch != RBRACK)
return (ENDSTATE);
nextitem(ap);
break;
}
return (a);
}
/*
* parse an expression (a sequence of primaries)
*/
LOCAL int
expr(ap, altp)
arg_t *ap;
int *altp;
{
int exits = ENDSTATE;
int a;
int *aux = ap->aux;
PCHAR Ch;
for (;;) {
a = prim(ap);
if (a == ENDSTATE)
return (ENDSTATE);
Ch = ap->Ch;
if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
exits = join(aux, exits, a);
if (Ch != ALT)
return (exits);
*altp = ap->patp;
altp = &aux[ap->patp];
nextitem(ap);
} else
setexits(aux, a, ap->patp);
}
}
/*
* set all exits in a list to a specified value
*/
LOCAL void
setexits(aux, list, val)
int *aux;
int list;
int val;
{
int a;
while (list != ENDSTATE) {
a = aux[list];
aux[list] = val;
list = a;
}
}
/*
* concatenate two lists
*/
LOCAL int
join(aux, a, b)
int *aux;
int a;
int b;
{
int t;
if (a == ENDSTATE)
return (b);
t = a;
while (aux[t] != ENDSTATE)
t = aux[t];
aux[t] = b;
return (a);
}
/*
* patcompile - the external compiler interface.
*
* The pattern is compiled into the aux array.
* Return value on success, is the outermost alternate which is != 0.
* Error is indicated by return of 0.
*/
EXPORT int
patcompile(pat, len, aux)
const PCHAR *pat;
int len;
int *aux;
{
arg_t a;
int alt = ENDSTATE;
int i;
a.pattern = pat;
a.length = len;
a.aux = aux;
a.patp = -1;
for (i = 0; i < len; i++)
aux[i] = ENDSTATE;
rch(&a);
i = expr(&a, &alt);
if (i == ENDSTATE)
return (0);
setexits(aux, i, ENDSTATE);
return (alt);
}
#endif /* !defined(__LINE_MATCH) && !defined(__MB_CHAR) */