New libregexp and APE ported to native
This commit is contained in:
parent
651d6c2bc6
commit
0a460e1722
29 changed files with 1861 additions and 1677 deletions
|
@ -1,15 +1,29 @@
|
|||
#pragma src "/sys/src/libregexp"
|
||||
#pragma lib "libregexp.a"
|
||||
#pragma src "/sys/src/libregexp"
|
||||
#pragma lib "libregexp.a"
|
||||
enum
|
||||
{
|
||||
OANY = 0,
|
||||
OBOL,
|
||||
OCLASS,
|
||||
OEOL,
|
||||
OJMP,
|
||||
ONOTNL,
|
||||
ORUNE,
|
||||
OSAVE,
|
||||
OSPLIT,
|
||||
OUNSAVE,
|
||||
};
|
||||
|
||||
typedef struct Resub Resub;
|
||||
typedef struct Reclass Reclass;
|
||||
typedef struct Reinst Reinst;
|
||||
typedef struct Reprog Reprog;
|
||||
typedef struct Resub Resub;
|
||||
typedef struct Reinst Reinst;
|
||||
typedef struct Reprog Reprog;
|
||||
typedef struct Rethread Rethread;
|
||||
|
||||
/*
|
||||
* Sub expression matches
|
||||
*/
|
||||
struct Resub{
|
||||
#pragma incomplete Reinst
|
||||
#pragma incomplete Rethread
|
||||
|
||||
struct Resub
|
||||
{
|
||||
union
|
||||
{
|
||||
char *sp;
|
||||
|
@ -21,46 +35,22 @@ struct Resub{
|
|||
Rune *rep;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* character class, each pair of rune's defines a range
|
||||
*/
|
||||
struct Reclass{
|
||||
Rune *end;
|
||||
Rune spans[64];
|
||||
struct Reprog
|
||||
{
|
||||
Reinst *startinst;
|
||||
Rethread *threads;
|
||||
Rethread **thrpool;
|
||||
char *regstr;
|
||||
int len;
|
||||
int nthr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Machine instructions
|
||||
*/
|
||||
struct Reinst{
|
||||
int type;
|
||||
union {
|
||||
Reclass *cp; /* class pointer */
|
||||
Rune r; /* character */
|
||||
int subid; /* sub-expression id for RBRA and LBRA */
|
||||
Reinst *right; /* right child of OR */
|
||||
};
|
||||
union { /* regexp relies on these two being in the same union */
|
||||
Reinst *left; /* left child of OR */
|
||||
Reinst *next; /* next instruction for CAT & LBRA */
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Reprogram definition
|
||||
*/
|
||||
struct Reprog{
|
||||
Reinst *startinst; /* start pc */
|
||||
Reclass class[16]; /* .data */
|
||||
Reinst firstinst[5]; /* .text */
|
||||
};
|
||||
|
||||
extern Reprog *regcomp(char*);
|
||||
extern Reprog *regcomplit(char*);
|
||||
extern Reprog *regcompnl(char*);
|
||||
extern void regerror(char*);
|
||||
extern int regexec(Reprog*, char*, Resub*, int);
|
||||
extern void regsub(char*, char*, int, Resub*, int);
|
||||
extern int rregexec(Reprog*, Rune*, Resub*, int);
|
||||
extern void rregsub(Rune*, Rune*, int, Resub*, int);
|
||||
Reprog* regcomp(char*);
|
||||
Reprog* regcomplit(char*);
|
||||
Reprog* regcompnl(char*);
|
||||
void regerror(char*);
|
||||
int regexec(Reprog*, char*, Resub*, int);
|
||||
void regsub(char*, char*, int, Resub*, int);
|
||||
int rregexec(Reprog*, Rune*, Resub*, int);
|
||||
void rregsub(Rune*, Rune*, int, Resub*, int);
|
||||
int reprogfmt(Fmt *);
|
||||
|
|
|
@ -6,20 +6,20 @@ Copyright (c) Lucent Technologies 1997
|
|||
|
||||
typedef double Awkfloat;
|
||||
|
||||
/* unsigned char is more trouble than it's worth */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } }
|
||||
#define xfree(a) { if ((a) != nil) { free((a)); (a) = nil; } }
|
||||
|
||||
#define DEBUG
|
||||
#ifdef DEBUG
|
||||
/* uses have to be doubly parenthesized */
|
||||
# define dprintf(x) if (dbg) printf x
|
||||
# define dprint(x) if (dbg) print x
|
||||
#else
|
||||
# define dprintf(x)
|
||||
# define dprint(x)
|
||||
#endif
|
||||
|
||||
#define FOPEN_MAX 40 /* max number of open files */
|
||||
|
||||
#define EOF -1
|
||||
|
||||
extern char errbuf[];
|
||||
|
||||
extern int compile_time; /* 1 if compiling, 0 if running */
|
||||
|
@ -28,6 +28,10 @@ extern int safe; /* 0 => unsafe, 1 => safe */
|
|||
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
|
||||
extern int recsize; /* size of current record, orig RECSIZE */
|
||||
|
||||
extern Biobuf stdin;
|
||||
extern Biobuf stdout;
|
||||
extern Biobuf stderr;
|
||||
|
||||
extern char **FS;
|
||||
extern char **RS;
|
||||
extern char **ORS;
|
||||
|
@ -56,8 +60,8 @@ extern int patlen; /* length of pattern matched. set in b.c */
|
|||
/* Cell: all information about a variable or constant */
|
||||
|
||||
typedef struct Cell {
|
||||
uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
|
||||
uschar csub; /* CCON, CTEMP, CFLD, etc. */
|
||||
uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */
|
||||
uchar csub; /* CCON, CTEMP, CFLD, etc. */
|
||||
char *nval; /* name, for variables only */
|
||||
char *sval; /* string value */
|
||||
Awkfloat fval; /* value as number */
|
||||
|
@ -66,7 +70,7 @@ typedef struct Cell {
|
|||
} Cell;
|
||||
|
||||
typedef struct Array { /* symbol table array */
|
||||
int nelem; /* elements in table right now */
|
||||
int nelemt; /* elements in table right now */
|
||||
int size; /* size of tab */
|
||||
Cell **tab; /* hash table pointers */
|
||||
} Array;
|
||||
|
|
|
@ -23,8 +23,9 @@ THIS SOFTWARE.
|
|||
****************************************************************/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
|
||||
#define makedfa(a,b) compre(a)
|
||||
|
|
|
@ -22,10 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <ctype.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
|
@ -90,9 +90,8 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
|
|||
{ "while", WHILE, WHILE },
|
||||
};
|
||||
|
||||
#define DEBUG
|
||||
#ifdef DEBUG
|
||||
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
|
||||
#define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); }
|
||||
#else
|
||||
#define RET(x) return(x)
|
||||
#endif
|
||||
|
@ -170,7 +169,7 @@ int yylex(void)
|
|||
static char *buf = 0;
|
||||
static int bufsize = 500;
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
|
||||
FATAL( "out of space in yylex" );
|
||||
if (sc) {
|
||||
sc = 0;
|
||||
|
@ -353,7 +352,7 @@ int string(void)
|
|||
static char *buf = 0;
|
||||
static int bufsz = 500;
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
|
||||
FATAL("out of space for strings");
|
||||
for (bp = buf; (c = input()) != '"'; ) {
|
||||
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
|
||||
|
@ -401,7 +400,7 @@ int string(void)
|
|||
}
|
||||
*px = 0;
|
||||
unput(c);
|
||||
sscanf(xbuf, "%x", &n);
|
||||
n = strtol(xbuf, nil, 16);
|
||||
*bp++ = n;
|
||||
break;
|
||||
}
|
||||
|
@ -497,7 +496,7 @@ int regexpr(void)
|
|||
static int bufsz = 500;
|
||||
char *bp;
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
|
||||
FATAL("out of space for rex expr");
|
||||
bp = buf;
|
||||
for ( ; (c = input()) != '/' && c != 0; ) {
|
||||
|
@ -526,7 +525,7 @@ char ebuf[300];
|
|||
char *ep = ebuf;
|
||||
char yysbuf[100]; /* pushback buffer */
|
||||
char *yysptr = yysbuf;
|
||||
FILE *yyin = 0;
|
||||
Biobuf *yyin;
|
||||
|
||||
int input(void) /* get next lexical input character */
|
||||
{
|
||||
|
@ -535,7 +534,7 @@ int input(void) /* get next lexical input character */
|
|||
|
||||
if (yysptr > yysbuf)
|
||||
c = *--yysptr;
|
||||
else if (lexprog != NULL) { /* awk '...' */
|
||||
else if (lexprog != nil) { /* awk '...' */
|
||||
if ((c = *lexprog) != 0)
|
||||
lexprog++;
|
||||
} else /* awk -f ... */
|
||||
|
|
|
@ -22,17 +22,14 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
FILE *infile = NULL;
|
||||
Biobuf *infile;
|
||||
char *file = "";
|
||||
char *record;
|
||||
int recsize = RECSIZE;
|
||||
|
@ -50,17 +47,17 @@ int donerec; /* 1 = record is valid (no flds have changed) */
|
|||
|
||||
int lastfld = 0; /* last used field */
|
||||
int argno = 1; /* current input argument number */
|
||||
extern Awkfloat *ARGC;
|
||||
extern Awkfloat *AARGC;
|
||||
|
||||
static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
|
||||
static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
|
||||
static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE };
|
||||
static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE };
|
||||
|
||||
void recinit(unsigned int n)
|
||||
{
|
||||
record = (char *) malloc(n);
|
||||
fields = (char *) malloc(n);
|
||||
fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
|
||||
if (record == NULL || fields == NULL || fldtab == NULL)
|
||||
if (record == nil || fields == nil || fldtab == nil)
|
||||
FATAL("out of space for $0 and fields");
|
||||
fldtab[0] = (Cell *) malloc(sizeof (Cell));
|
||||
*fldtab[0] = dollar0;
|
||||
|
@ -76,10 +73,10 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
|
|||
|
||||
for (i = n1; i <= n2; i++) {
|
||||
fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
|
||||
if (fldtab[i] == NULL)
|
||||
if (fldtab[i] == nil)
|
||||
FATAL("out of space in makefields %d", i);
|
||||
*fldtab[i] = dollar1;
|
||||
sprintf(temp, "%d", i);
|
||||
sprint(temp, "%d", i);
|
||||
fldtab[i]->nval = tostring(temp);
|
||||
}
|
||||
}
|
||||
|
@ -89,7 +86,7 @@ void initgetrec(void)
|
|||
int i;
|
||||
char *p;
|
||||
|
||||
for (i = 1; i < *ARGC; i++) {
|
||||
for (i = 1; i < *AARGC; i++) {
|
||||
if (!isclvar(p = getargv(i))) { /* find 1st real filename */
|
||||
setsval(lookup("FILENAME", symtab), getargv(i));
|
||||
return;
|
||||
|
@ -97,7 +94,7 @@ void initgetrec(void)
|
|||
setclvar(p); /* a commandline assignment before filename */
|
||||
argno++;
|
||||
}
|
||||
infile = stdin; /* no filenames, so use stdin */
|
||||
infile = &stdin; /* no filenames, so use &stdin */
|
||||
}
|
||||
|
||||
int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
||||
|
@ -111,16 +108,16 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
|||
firsttime = 0;
|
||||
initgetrec();
|
||||
}
|
||||
dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
|
||||
*RS, *FS, *ARGC, *FILENAME) );
|
||||
dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n",
|
||||
*RS, *FS, *AARGC, *FILENAME) );
|
||||
if (isrecord) {
|
||||
donefld = 0;
|
||||
donerec = 1;
|
||||
}
|
||||
buf[0] = 0;
|
||||
while (argno < *ARGC || infile == stdin) {
|
||||
dprintf( ("argno=%d, file=|%s|\n", argno, file) );
|
||||
if (infile == NULL) { /* have to open a new file */
|
||||
while (argno < *AARGC || infile == &stdin) {
|
||||
dprint( ("argno=%d, file=|%s|\n", argno, file) );
|
||||
if (infile == nil) { /* have to open a new file */
|
||||
file = getargv(argno);
|
||||
if (*file == '\0') { /* it's been zapped */
|
||||
argno++;
|
||||
|
@ -132,10 +129,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
|||
continue;
|
||||
}
|
||||
*FILENAME = file;
|
||||
dprintf( ("opening file %s\n", file) );
|
||||
dprint( ("opening file %s\n", file) );
|
||||
if (*file == '-' && *(file+1) == '\0')
|
||||
infile = stdin;
|
||||
else if ((infile = fopen(file, "r")) == NULL)
|
||||
infile = &stdin;
|
||||
else if ((infile = Bopen(file, OREAD)) == nil)
|
||||
FATAL("can't open file %s", file);
|
||||
setfval(fnrloc, 0.0);
|
||||
}
|
||||
|
@ -158,9 +155,9 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
|||
return 1;
|
||||
}
|
||||
/* EOF arrived on this file; set up next */
|
||||
if (infile != stdin)
|
||||
fclose(infile);
|
||||
infile = NULL;
|
||||
if (infile != &stdin)
|
||||
Bterm(infile);
|
||||
infile = nil;
|
||||
argno++;
|
||||
}
|
||||
*pbuf = buf;
|
||||
|
@ -170,13 +167,13 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
|||
|
||||
void nextfile(void)
|
||||
{
|
||||
if (infile != stdin)
|
||||
fclose(infile);
|
||||
infile = NULL;
|
||||
if (infile != &stdin)
|
||||
Bterm(infile);
|
||||
infile = nil;
|
||||
argno++;
|
||||
}
|
||||
|
||||
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
|
||||
int readrec(char **pbuf, int *pbufsize, Biobuf *inf) /* read one record into buf */
|
||||
{
|
||||
int sep, c;
|
||||
char *rr, *buf = *pbuf;
|
||||
|
@ -187,13 +184,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
|||
strcpy(inputFS, *FS); /* for subsequent field splitting */
|
||||
if ((sep = **RS) == 0) {
|
||||
sep = '\n';
|
||||
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
|
||||
while ((c=Bgetc(inf)) == '\n' && c != EOF) /* skip leading \n's */
|
||||
;
|
||||
if (c != EOF)
|
||||
ungetc(c, inf);
|
||||
Bungetc(inf);
|
||||
}
|
||||
for (rr = buf; ; ) {
|
||||
for (; (c=getc(inf)) != sep && c != EOF; ) {
|
||||
for (; (c=Bgetc(inf)) != sep && c != EOF; ) {
|
||||
if (rr-buf+1 > bufsize)
|
||||
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
|
@ -201,7 +198,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
|||
}
|
||||
if (**RS == sep || c == EOF)
|
||||
break;
|
||||
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
|
||||
if ((c = Bgetc(inf)) == '\n' || c == EOF) /* 2 in a row */
|
||||
break;
|
||||
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
|
@ -211,7 +208,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
|||
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
*rr = 0;
|
||||
dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
|
||||
dprint( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
|
||||
*pbuf = buf;
|
||||
*pbufsize = bufsize;
|
||||
return c == EOF && rr == buf ? 0 : 1;
|
||||
|
@ -223,10 +220,10 @@ char *getargv(int n) /* get ARGV[n] */
|
|||
char *s, temp[50];
|
||||
extern Array *ARGVtab;
|
||||
|
||||
sprintf(temp, "%d", n);
|
||||
sprint(temp, "%d", n);
|
||||
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
|
||||
s = getsval(x);
|
||||
dprintf( ("getargv(%d) returns |%s|\n", n, s) );
|
||||
dprint( ("getargv(%d) returns |%s|\n", n, s) );
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -245,7 +242,7 @@ void setclvar(char *s) /* set var=value from s */
|
|||
q->fval = atof(q->sval);
|
||||
q->tval |= NUM;
|
||||
}
|
||||
dprintf( ("command line set %s to |%s|\n", s, p) );
|
||||
dprint( ("command line set %s to |%s|\n", s, p) );
|
||||
}
|
||||
|
||||
|
||||
|
@ -265,7 +262,7 @@ void fldbld(void) /* create fields from current record */
|
|||
n = strlen(r);
|
||||
if (n > fieldssize) {
|
||||
xfree(fields);
|
||||
if ((fields = (char *) malloc(n+1)) == NULL)
|
||||
if ((fields = (char *) malloc(n+1)) == nil)
|
||||
FATAL("out of space for fields in fldbld %d", n);
|
||||
fieldssize = n;
|
||||
}
|
||||
|
@ -273,7 +270,7 @@ void fldbld(void) /* create fields from current record */
|
|||
i = 0; /* number of fields accumulated here */
|
||||
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
||||
i = refldbld(r, inputFS);
|
||||
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
|
||||
} else if (*inputFS == ' ') { /* default whitespace */
|
||||
for (i = 0; ; ) {
|
||||
while (*r == ' ' || *r == '\t' || *r == '\n')
|
||||
r++;
|
||||
|
@ -339,7 +336,7 @@ void fldbld(void) /* create fields from current record */
|
|||
if (dbg) {
|
||||
for (j = 0; j <= lastfld; j++) {
|
||||
p = fldtab[j];
|
||||
printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
|
||||
print("field %d (%s): |%s|\n", j, p->nval, p->sval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -383,7 +380,7 @@ void growfldtab(int n) /* make new fields up to at least $n */
|
|||
if (n > nf)
|
||||
nf = n;
|
||||
fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
|
||||
if (fldtab == NULL)
|
||||
if (fldtab == nil)
|
||||
FATAL("out of space creating %d fields", nf);
|
||||
makefields(nfields+1, nf);
|
||||
nfields = nf;
|
||||
|
@ -395,12 +392,12 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
|
|||
/* the fields are all stored in this one array with \0's */
|
||||
char *fr;
|
||||
void *p;
|
||||
int i, tempstat, n;
|
||||
int i, n;
|
||||
|
||||
n = strlen(rec);
|
||||
if (n > fieldssize) {
|
||||
xfree(fields);
|
||||
if ((fields = (char *) malloc(n+1)) == NULL)
|
||||
if ((fields = (char *) malloc(n+1)) == nil)
|
||||
FATAL("out of space for fields in refldbld %d", n);
|
||||
fieldssize = n;
|
||||
}
|
||||
|
@ -409,7 +406,7 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
|
|||
if (*rec == '\0')
|
||||
return 0;
|
||||
p = compre(fs);
|
||||
dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
|
||||
dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
|
||||
for (i = 1; ; i++) {
|
||||
if (i > nfields)
|
||||
growfldtab(i);
|
||||
|
@ -417,15 +414,15 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
|
|||
xfree(fldtab[i]->sval);
|
||||
fldtab[i]->tval = FLD | STR | DONTFREE;
|
||||
fldtab[i]->sval = fr;
|
||||
dprintf( ("refldbld: i=%d\n", i) );
|
||||
dprint( ("refldbld: i=%d\n", i) );
|
||||
if (nematch(p, rec, rec)) {
|
||||
dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
|
||||
dprint( ("match %s (%d chars)\n", patbeg, patlen) );
|
||||
strncpy(fr, rec, patbeg-rec);
|
||||
fr += patbeg - rec + 1;
|
||||
*(fr-1) = '\0';
|
||||
rec = patbeg + patlen;
|
||||
} else {
|
||||
dprintf( ("no match %s\n", rec) );
|
||||
dprint( ("no match %s\n", rec) );
|
||||
strcpy(fr, rec);
|
||||
break;
|
||||
}
|
||||
|
@ -457,15 +454,15 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
|
|||
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
|
||||
FATAL("built giant record `%.30s...'", record);
|
||||
*r = '\0';
|
||||
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
|
||||
if (freeable(fldtab[0]))
|
||||
xfree(fldtab[0]->sval);
|
||||
fldtab[0]->tval = REC | STR | DONTFREE;
|
||||
fldtab[0]->sval = record;
|
||||
|
||||
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
dprintf( ("recbld = |%s|\n", record) );
|
||||
dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
dprint( ("recbld = |%s|\n", record) );
|
||||
donerec = 1;
|
||||
}
|
||||
|
||||
|
@ -484,24 +481,26 @@ void SYNTAX(char *fmt, ...)
|
|||
|
||||
if (been_here++ > 2)
|
||||
return;
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
Bprint(&stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
Bvprint(&stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
if(compile_time == 1 && cursource() != NULL)
|
||||
fprintf(stderr, " at %s:%d", cursource(), lineno);
|
||||
if(compile_time == 1 && cursource() != nil)
|
||||
Bprint(&stderr, " at %s:%d", cursource(), lineno);
|
||||
else
|
||||
fprintf(stderr, " at line %d", lineno);
|
||||
if (curfname != NULL)
|
||||
fprintf(stderr, " in function %s", curfname);
|
||||
fprintf(stderr, "\n");
|
||||
Bprint(&stderr, " at line %d", lineno);
|
||||
if (curfname != nil)
|
||||
Bprint(&stderr, " in function %s", curfname);
|
||||
Bprint(&stderr, "\n");
|
||||
errorflag = 2;
|
||||
eprint();
|
||||
}
|
||||
|
||||
void fpecatch(int n)
|
||||
int handler(void *, char *err)
|
||||
{
|
||||
FATAL("floating point exception %d", n);
|
||||
Bflush(&stdout);
|
||||
fprint(2, "%s\n", err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int bracecnt, brackcnt, parencnt;
|
||||
|
@ -520,16 +519,16 @@ void bracecheck(void)
|
|||
bcheck2(parencnt, '(', ')');
|
||||
}
|
||||
|
||||
void bcheck2(int n, int c1, int c2)
|
||||
void bcheck2(int n, int, int c2)
|
||||
{
|
||||
if (n == 1)
|
||||
fprintf(stderr, "\tmissing %c\n", c2);
|
||||
Bprint(&stderr, "\tmissing %c\n", c2);
|
||||
else if (n > 1)
|
||||
fprintf(stderr, "\t%d missing %c's\n", n, c2);
|
||||
Bprint(&stderr, "\t%d missing %c's\n", n, c2);
|
||||
else if (n == -1)
|
||||
fprintf(stderr, "\textra %c\n", c2);
|
||||
Bprint(&stderr, "\textra %c\n", c2);
|
||||
else if (n < -1)
|
||||
fprintf(stderr, "\t%d extra %c's\n", -n, c2);
|
||||
Bprint(&stderr, "\t%d extra %c's\n", -n, c2);
|
||||
}
|
||||
|
||||
void FATAL(char *fmt, ...)
|
||||
|
@ -537,15 +536,15 @@ void FATAL(char *fmt, ...)
|
|||
extern char *cmdname;
|
||||
va_list varg;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
Bflush(&stdout);
|
||||
Bprint(&stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
Bvprint(&stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
error();
|
||||
if (dbg > 1) /* core dump if serious debugging on */
|
||||
abort();
|
||||
exit(2);
|
||||
exits("FATAL");
|
||||
}
|
||||
|
||||
void WARNING(char *fmt, ...)
|
||||
|
@ -553,10 +552,10 @@ void WARNING(char *fmt, ...)
|
|||
extern char *cmdname;
|
||||
va_list varg;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
Bflush(&stdout);
|
||||
Bprint(&stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
Bvprint(&stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
error();
|
||||
}
|
||||
|
@ -566,13 +565,13 @@ void error()
|
|||
extern Node *curnode;
|
||||
int line;
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
Bprint(&stderr, "\n");
|
||||
if (compile_time != 2 && NR && *NR > 0) {
|
||||
if (strcmp(*FILENAME, "-") != 0)
|
||||
fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
|
||||
Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
|
||||
else
|
||||
fprintf(stderr, " input record number %d", (int) (*FNR));
|
||||
fprintf(stderr, "\n");
|
||||
Bprint(&stderr, " input record number %d", (int) (*FNR));
|
||||
Bprint(&stderr, "\n");
|
||||
}
|
||||
if (compile_time != 2 && curnode)
|
||||
line = curnode->lineno;
|
||||
|
@ -580,14 +579,14 @@ void error()
|
|||
line = lineno;
|
||||
else
|
||||
line = -1;
|
||||
if (compile_time == 1 && cursource() != NULL){
|
||||
if (compile_time == 1 && cursource() != nil){
|
||||
if(line >= 0)
|
||||
fprintf(stderr, " source %s:%d", cursource(), line);
|
||||
Bprint(&stderr, " source %s:%d", cursource(), line);
|
||||
else
|
||||
fprintf(stderr, " source file %s", cursource());
|
||||
Bprint(&stderr, " source file %s", cursource());
|
||||
}else if(line >= 0)
|
||||
fprintf(stderr, " source line %d", line);
|
||||
fprintf(stderr, "\n");
|
||||
Bprint(&stderr, " source line %d", line);
|
||||
Bprint(&stderr, "\n");
|
||||
eprint();
|
||||
}
|
||||
|
||||
|
@ -607,23 +606,23 @@ void eprint(void) /* try to print context around error */
|
|||
;
|
||||
while (*p == '\n')
|
||||
p++;
|
||||
fprintf(stderr, " context is\n\t");
|
||||
Bprint(&stderr, " context is\n\t");
|
||||
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
|
||||
;
|
||||
for ( ; p < q; p++)
|
||||
if (*p)
|
||||
putc(*p, stderr);
|
||||
fprintf(stderr, " >>> ");
|
||||
Bputc(&stderr, *p);
|
||||
Bprint(&stderr, " >>> ");
|
||||
for ( ; p < ep; p++)
|
||||
if (*p)
|
||||
putc(*p, stderr);
|
||||
fprintf(stderr, " <<< ");
|
||||
Bputc(&stderr, *p);
|
||||
Bprint(&stderr, " <<< ");
|
||||
if (*ep)
|
||||
while ((c = input()) != '\n' && c != '\0' && c != EOF) {
|
||||
putc(c, stderr);
|
||||
Bputc(&stderr, c);
|
||||
bclass(c);
|
||||
}
|
||||
putc('\n', stderr);
|
||||
Bputc(&stderr, '\n');
|
||||
ep = ebuf;
|
||||
}
|
||||
|
||||
|
@ -642,12 +641,10 @@ void bclass(int c)
|
|||
double errcheck(double x, char *s)
|
||||
{
|
||||
|
||||
if (errno == EDOM) {
|
||||
errno = 0;
|
||||
if (isNaN(x)) {
|
||||
WARNING("%s argument out of domain", s);
|
||||
x = 1;
|
||||
} else if (errno == ERANGE) {
|
||||
errno = 0;
|
||||
} else if (isInf(x, 1) || isInf(x, -1)) {
|
||||
WARNING("%s result out of range", s);
|
||||
x = 1;
|
||||
}
|
||||
|
@ -668,7 +665,6 @@ int isclvar(char *s) /* is s of form var=something ? */
|
|||
|
||||
/* strtod is supposed to be a proper test of what's a valid number */
|
||||
|
||||
#include <math.h>
|
||||
int is_number(char *s)
|
||||
{
|
||||
double r;
|
||||
|
@ -699,9 +695,8 @@ int is_number(char *s)
|
|||
return 0; /* can't be a number */
|
||||
}
|
||||
|
||||
errno = 0;
|
||||
r = strtod(s, &ep);
|
||||
if (ep == s || r == HUGE_VAL || errno == ERANGE)
|
||||
if (ep == s || isInf(r, 1) || isInf(r, -1))
|
||||
return 0;
|
||||
while (*ep == ' ' || *ep == '\t' || *ep == '\n')
|
||||
ep++;
|
||||
|
|
|
@ -24,21 +24,21 @@ THIS SOFTWARE.
|
|||
|
||||
char *version = "version 19990602";
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
extern char **environ;
|
||||
extern int nfields;
|
||||
|
||||
Biobuf stdin;
|
||||
Biobuf stdout;
|
||||
Biobuf stderr;
|
||||
|
||||
int dbg = 0;
|
||||
char *cmdname; /* gets argv[0] for error messages */
|
||||
extern FILE *yyin; /* lex input file */
|
||||
extern Biobuf *yyin; /* lex input file */
|
||||
char *lexprog; /* points to program argument if it exists */
|
||||
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
|
||||
int compile_time = 2; /* for error printing: */
|
||||
|
@ -50,18 +50,23 @@ int curpfile = 0; /* current filename */
|
|||
|
||||
int safe = 0; /* 1 => "safe" mode */
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
void main(int argc, char *argv[])
|
||||
{
|
||||
char *fs = NULL, *marg;
|
||||
char *fs = nil, *marg;
|
||||
int temp;
|
||||
|
||||
Binit(&stdin, 0, OREAD);
|
||||
Binit(&stdout, 1, OWRITE);
|
||||
Binit(&stderr, 2, OWRITE);
|
||||
|
||||
cmdname = argv[0];
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
|
||||
exit(1);
|
||||
Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
|
||||
exits("usage");
|
||||
}
|
||||
signal(SIGFPE, fpecatch);
|
||||
yyin = NULL;
|
||||
|
||||
atnotify(handler, 1);
|
||||
yyin = nil;
|
||||
symtab = makesymtab(NSYMTAB);
|
||||
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
|
||||
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
|
||||
|
@ -94,7 +99,7 @@ int main(int argc, char *argv[])
|
|||
else if (argc > 1 && argv[1][0] != 0)
|
||||
fs = &argv[1][0];
|
||||
}
|
||||
if (fs == NULL || *fs == '\0')
|
||||
if (fs == nil || *fs == '\0')
|
||||
WARNING("field separator FS is empty");
|
||||
break;
|
||||
case 'v': /* -v a=1 to be done NOW. one -v for each */
|
||||
|
@ -120,11 +125,11 @@ int main(int argc, char *argv[])
|
|||
dbg = atoi(&argv[1][2]);
|
||||
if (dbg == 0)
|
||||
dbg = 1;
|
||||
printf("awk %s\n", version);
|
||||
print("awk %s\n", version);
|
||||
break;
|
||||
case 'V': /* added for exptools "standard" */
|
||||
printf("awk %s\n", version);
|
||||
exit(0);
|
||||
print("awk %s\n", version);
|
||||
exits(0);
|
||||
break;
|
||||
default:
|
||||
WARNING("unknown option %s ignored", argv[1]);
|
||||
|
@ -137,10 +142,10 @@ int main(int argc, char *argv[])
|
|||
if (npfile == 0) { /* no -f; first argument is program */
|
||||
if (argc <= 1) {
|
||||
if (dbg)
|
||||
exit(0);
|
||||
exits(0);
|
||||
FATAL("no program given");
|
||||
}
|
||||
dprintf( ("program = |%s|\n", argv[1]) );
|
||||
dprint( ("program = |%s|\n", argv[1]) );
|
||||
lexprog = argv[1];
|
||||
argc--;
|
||||
argv++;
|
||||
|
@ -149,20 +154,20 @@ int main(int argc, char *argv[])
|
|||
syminit();
|
||||
compile_time = 1;
|
||||
argv[0] = cmdname; /* put prog name at front of arglist */
|
||||
dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
|
||||
dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
|
||||
arginit(argc, argv);
|
||||
if (!safe)
|
||||
envinit(environ);
|
||||
yyparse();
|
||||
if (fs)
|
||||
*FS = qstring(fs, '\0');
|
||||
dprintf( ("errorflag=%d\n", errorflag) );
|
||||
dprint( ("errorflag=%d\n", errorflag) );
|
||||
if (errorflag == 0) {
|
||||
compile_time = 0;
|
||||
run(winner);
|
||||
} else
|
||||
bracecheck();
|
||||
return(errorflag);
|
||||
if(errorflag)
|
||||
exits("error");
|
||||
exits(0);
|
||||
}
|
||||
|
||||
int pgetc(void) /* get 1 character from awk program */
|
||||
|
@ -170,20 +175,20 @@ int pgetc(void) /* get 1 character from awk program */
|
|||
int c;
|
||||
|
||||
for (;;) {
|
||||
if (yyin == NULL) {
|
||||
if (yyin == nil) {
|
||||
if (curpfile >= npfile)
|
||||
return EOF;
|
||||
if (strcmp(pfile[curpfile], "-") == 0)
|
||||
yyin = stdin;
|
||||
else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
|
||||
yyin = &stdin;
|
||||
else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil)
|
||||
FATAL("can't open file %s", pfile[curpfile]);
|
||||
lineno = 1;
|
||||
}
|
||||
if ((c = getc(yyin)) != EOF)
|
||||
if ((c = Bgetc(yyin)) != EOF)
|
||||
return c;
|
||||
if (yyin != stdin)
|
||||
fclose(yyin);
|
||||
yyin = NULL;
|
||||
if (yyin != &stdin)
|
||||
Bterm(yyin);
|
||||
yyin = nil;
|
||||
curpfile++;
|
||||
}
|
||||
}
|
||||
|
@ -193,5 +198,5 @@ char *cursource(void) /* current source file name */
|
|||
if (npfile > 0)
|
||||
return pfile[curpfile];
|
||||
else
|
||||
return NULL;
|
||||
return nil;
|
||||
}
|
||||
|
|
|
@ -28,9 +28,9 @@ THIS SOFTWARE.
|
|||
* it finds the indices in y.tab.h, produced by yacc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
|
@ -39,7 +39,7 @@ struct xx
|
|||
char *name;
|
||||
char *pname;
|
||||
} proc[] = {
|
||||
{ PROGRAM, "program", NULL },
|
||||
{ PROGRAM, "program", nil },
|
||||
{ BOR, "boolop", " || " },
|
||||
{ AND, "boolop", " && " },
|
||||
{ NOT, "boolop", " !" },
|
||||
|
@ -49,13 +49,13 @@ struct xx
|
|||
{ LT, "relop", " < " },
|
||||
{ GE, "relop", " >= " },
|
||||
{ GT, "relop", " > " },
|
||||
{ ARRAY, "array", NULL },
|
||||
{ ARRAY, "array", nil },
|
||||
{ INDIRECT, "indirect", "$(" },
|
||||
{ SUBSTR, "substr", "substr" },
|
||||
{ SUB, "sub", "sub" },
|
||||
{ GSUB, "gsub", "gsub" },
|
||||
{ INDEX, "sindex", "sindex" },
|
||||
{ SPRINTF, "awksprintf", "sprintf " },
|
||||
{ SPRINTF, "awksprintf", "sprintf" },
|
||||
{ ADD, "arith", " + " },
|
||||
{ MINUS, "arith", " - " },
|
||||
{ MULT, "arith", " * " },
|
||||
|
@ -68,8 +68,8 @@ struct xx
|
|||
{ PREDECR, "incrdecr", "--" },
|
||||
{ POSTDECR, "incrdecr", "--" },
|
||||
{ CAT, "cat", " " },
|
||||
{ PASTAT, "pastat", NULL },
|
||||
{ PASTAT2, "dopa2", NULL },
|
||||
{ PASTAT, "pastat", nil },
|
||||
{ PASTAT2, "dopa2", nil },
|
||||
{ MATCH, "matchop", " ~ " },
|
||||
{ NOTMATCH, "matchop", " !~ " },
|
||||
{ MATCHFCN, "matchop", "matchop" },
|
||||
|
@ -110,59 +110,62 @@ struct xx
|
|||
char *table[SIZE];
|
||||
char *names[SIZE];
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
void main(int, char**)
|
||||
{
|
||||
struct xx *p;
|
||||
int i, n, tok;
|
||||
char c;
|
||||
FILE *fp;
|
||||
char buf[200], name[200], def[200];
|
||||
int i, tok;
|
||||
Biobuf *fp;
|
||||
char *buf, *toks[3];
|
||||
|
||||
printf("#include <stdio.h>\n");
|
||||
printf("#include \"awk.h\"\n");
|
||||
printf("#include \"y.tab.h\"\n\n");
|
||||
print("#include <u.h>\n");
|
||||
print("#include <libc.h>\n");
|
||||
print("#include <bio.h>\n");
|
||||
print("#include \"awk.h\"\n");
|
||||
print("#include \"y.tab.h\"\n\n");
|
||||
for (i = SIZE; --i >= 0; )
|
||||
names[i] = "";
|
||||
|
||||
if ((fp = fopen("y.tab.h", "r")) == NULL) {
|
||||
fprintf(stderr, "maketab can't open y.tab.h!\n");
|
||||
exit(1);
|
||||
if ((fp = Bopen("y.tab.h", OREAD)) == nil) {
|
||||
fprint(2, "maketab can't open y.tab.h!\n");
|
||||
exits("can't open y.tab.h");
|
||||
}
|
||||
printf("static char *printname[%d] = {\n", SIZE);
|
||||
print("static char *printname[%d] = {\n", SIZE);
|
||||
i = 0;
|
||||
while (fgets(buf, sizeof buf, fp) != NULL) {
|
||||
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
|
||||
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
|
||||
while ((buf = Brdline(fp, '\n')) != nil) {
|
||||
buf[Blinelen(fp)-1] = '\0';
|
||||
tokenize(buf, toks, 3);
|
||||
if (toks[0] == nil || strcmp("#define", toks[0]) != 0) /* not a valid #define */
|
||||
continue;
|
||||
tok = strtol(toks[2], nil, 10);
|
||||
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
|
||||
fprintf(stderr, "maketab funny token %d %s\n", tok, buf);
|
||||
exit(1);
|
||||
fprint(2, "maketab funny token %d %s\n", tok, buf);
|
||||
exits("funny token");
|
||||
}
|
||||
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
|
||||
strcpy(names[tok-FIRSTTOKEN], name);
|
||||
printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
|
||||
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1);
|
||||
strcpy(names[tok-FIRSTTOKEN], toks[1]);
|
||||
print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok);
|
||||
i++;
|
||||
}
|
||||
printf("};\n\n");
|
||||
print("};\n\n");
|
||||
|
||||
for (p=proc; p->token!=0; p++)
|
||||
table[p->token-FIRSTTOKEN] = p->name;
|
||||
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
|
||||
print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
|
||||
for (i=0; i<SIZE; i++)
|
||||
if (table[i]==0)
|
||||
printf("\tnullproc,\t/* %s */\n", names[i]);
|
||||
print("\tnullproc,\t/* %s */\n", names[i]);
|
||||
else
|
||||
printf("\t%s,\t/* %s */\n", table[i], names[i]);
|
||||
printf("};\n\n");
|
||||
print("\t%s,\t/* %s */\n", table[i], names[i]);
|
||||
print("};\n\n");
|
||||
|
||||
printf("char *tokname(int n)\n"); /* print a tokname() function */
|
||||
printf("{\n");
|
||||
printf(" static char buf[100];\n\n");
|
||||
printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
|
||||
printf(" sprintf(buf, \"token %%d\", n);\n");
|
||||
printf(" return buf;\n");
|
||||
printf(" }\n");
|
||||
printf(" return printname[n-FIRSTTOKEN];\n");
|
||||
printf("}\n");
|
||||
return 0;
|
||||
print("char *tokname(int n)\n"); /* print a tokname() function */
|
||||
print("{\n");
|
||||
print(" static char buf[100];\n\n");
|
||||
print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
|
||||
print(" sprint(buf, \"token %%d\", n);\n");
|
||||
print(" return buf;\n");
|
||||
print(" }\n");
|
||||
print(" return printname[n-FIRSTTOKEN];\n");
|
||||
print("}\n");
|
||||
exits(0);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ OFILES=re.$O\
|
|||
main.$O\
|
||||
parse.$O\
|
||||
proctab.$O\
|
||||
popen.$O\
|
||||
tran.$O\
|
||||
lib.$O\
|
||||
run.$O\
|
||||
|
@ -28,11 +29,6 @@ UPDATE=\
|
|||
${TARG:%=/386/bin/%}\
|
||||
|
||||
</sys/src/cmd/mkone
|
||||
CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE -D_BSD_EXTENSION -DUTF
|
||||
YFLAGS=-S -d -v
|
||||
CC=pcc
|
||||
LD=pcc
|
||||
cpuobjtype=`{sed -n 's/^O=//p' /$cputype/mkfile}
|
||||
|
||||
y.tab.h awkgram.c: $YFILES
|
||||
$YACC -o awkgram.c $YFLAGS $prereq
|
||||
|
@ -43,10 +39,10 @@ clean:V:
|
|||
nuke:V:
|
||||
rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG
|
||||
|
||||
proctab.c: $cpuobjtype.maketab
|
||||
./$cpuobjtype.maketab >proctab.c
|
||||
proctab.c: $O.maketab
|
||||
./$O.maketab >proctab.c
|
||||
|
||||
$cpuobjtype.maketab: y.tab.h maketab.c
|
||||
$O.maketab: y.tab.h maketab.c
|
||||
objtype=$cputype
|
||||
mk maketab.$cputype
|
||||
|
||||
|
|
|
@ -22,10 +22,9 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
|
@ -34,9 +33,9 @@ Node *nodealloc(int n)
|
|||
Node *x;
|
||||
|
||||
x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
|
||||
if (x == NULL)
|
||||
if (x == nil)
|
||||
FATAL("out of space in nodealloc");
|
||||
x->nnext = NULL;
|
||||
x->nnext = nil;
|
||||
x->lineno = lineno;
|
||||
return(x);
|
||||
}
|
||||
|
@ -220,11 +219,11 @@ Node *linkum(Node *a, Node *b)
|
|||
|
||||
if (errorflag) /* don't link things that are wrong */
|
||||
return a;
|
||||
if (a == NULL)
|
||||
if (a == nil)
|
||||
return(b);
|
||||
else if (b == NULL)
|
||||
else if (b == nil)
|
||||
return(a);
|
||||
for (c = a; c->nnext != NULL; c = c->nnext)
|
||||
for (c = a; c->nnext != nil; c = c->nnext)
|
||||
;
|
||||
c->nnext = b;
|
||||
return(a);
|
||||
|
@ -245,7 +244,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
|
|||
for (p = vl; p; p = p->nnext)
|
||||
n++;
|
||||
v->fval = n;
|
||||
dprintf( ("defining func %s (%d args)\n", v->nval, n) );
|
||||
dprint( ("defining func %s (%d args)\n", v->nval, n) );
|
||||
}
|
||||
|
||||
int isarg(char *s) /* is s in argument list for current function? */
|
||||
|
@ -262,7 +261,7 @@ int isarg(char *s) /* is s in argument list for current function? */
|
|||
|
||||
int ptoi(void *p) /* convert pointer to integer */
|
||||
{
|
||||
return (int) (long) p; /* swearing that p fits, of course */
|
||||
return (int) (vlong) p; /* swearing that p fits, of course */
|
||||
}
|
||||
|
||||
Node *itonp(int i) /* and vice versa */
|
||||
|
|
|
@ -44,7 +44,6 @@ extern void quoted(char **, char **, char *);
|
|||
extern int match(void *, char *, char *);
|
||||
extern int pmatch(void *, char *, char *);
|
||||
extern int nematch(void *, char *, char *);
|
||||
extern int countposn(char *, int);
|
||||
extern void overflow(void);
|
||||
|
||||
extern int pgetc(void);
|
||||
|
@ -100,7 +99,7 @@ extern void makefields(int, int);
|
|||
extern void growfldtab(int n);
|
||||
extern int getrec(char **, int *, int);
|
||||
extern void nextfile(void);
|
||||
extern int readrec(char **buf, int *bufsize, FILE *inf);
|
||||
extern int readrec(char **buf, int *bufsize, Biobuf *inf);
|
||||
extern char *getargv(int);
|
||||
extern void setclvar(char *);
|
||||
extern void fldbld(void);
|
||||
|
@ -110,7 +109,7 @@ extern int refldbld(char *, char *);
|
|||
extern void recbld(void);
|
||||
extern Cell *fieldadr(int);
|
||||
extern void yyerror(char *);
|
||||
extern void fpecatch(int);
|
||||
extern int handler(void*, char*);
|
||||
extern void bracecheck(void);
|
||||
extern void bcheck2(int, int, int);
|
||||
extern void SYNTAX(char *, ...);
|
||||
|
@ -165,13 +164,13 @@ extern Cell *instat(Node **, int);
|
|||
extern Cell *bltin(Node **, int);
|
||||
extern Cell *printstat(Node **, int);
|
||||
extern Cell *nullproc(Node **, int);
|
||||
extern FILE *redirect(int, Node *);
|
||||
extern FILE *openfile(int, char *);
|
||||
extern char *filename(FILE *);
|
||||
extern Biobuf *redirect(int, Node *);
|
||||
extern Biobuf *openfile(int, char *);
|
||||
extern char *filename(Biobuf *);
|
||||
extern Cell *closefile(Node **, int);
|
||||
extern void closeall(void);
|
||||
extern Cell *sub(Node **, int);
|
||||
extern Cell *gsub(Node **, int);
|
||||
|
||||
extern FILE *popen(const char *, const char *);
|
||||
extern int pclose(FILE *);
|
||||
extern Biobuf *popen(char *, int);
|
||||
extern int pclose(Biobuf *);
|
||||
|
|
|
@ -22,18 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <ctype.h>
|
||||
#include <setjmp.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
#include "regexp.h"
|
||||
|
||||
/* This file provides the interface between the main body of
|
||||
* awk and the pattern matching package. It preprocesses
|
||||
|
@ -198,11 +193,11 @@ pmatch(void *p, char *s, char *start)
|
|||
{
|
||||
Resub m;
|
||||
|
||||
m.s.sp = start;
|
||||
m.e.ep = 0;
|
||||
m.sp = start;
|
||||
m.ep = 0;
|
||||
if (regexec((Reprog *) p, (char *) s, &m, 1)) {
|
||||
patbeg = m.s.sp;
|
||||
patlen = m.e.ep-m.s.sp;
|
||||
patbeg = m.sp;
|
||||
patlen = m.ep-m.sp;
|
||||
return 1;
|
||||
}
|
||||
patlen = -1;
|
||||
|
@ -250,7 +245,7 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
|
|||
{
|
||||
char *p = *s;
|
||||
char *t = *to;
|
||||
wchar_t c;
|
||||
Rune c;
|
||||
|
||||
switch(c = *p++) {
|
||||
case 't':
|
||||
|
@ -273,8 +268,8 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
|
|||
*t++ = '\\';
|
||||
if (c == 'x') { /* hexadecimal goo follows */
|
||||
c = hexstr(&p);
|
||||
if (t < end-MB_CUR_MAX)
|
||||
t += wctomb(t, c);
|
||||
if (t < end-UTFmax)
|
||||
t += runelen(c);
|
||||
else overflow();
|
||||
*to = t;
|
||||
*s = p;
|
||||
|
@ -294,21 +289,6 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
|
|||
*s = p;
|
||||
*to = t;
|
||||
}
|
||||
/* count rune positions */
|
||||
int
|
||||
countposn(char *s, int n)
|
||||
{
|
||||
int i, j;
|
||||
char *end;
|
||||
|
||||
for (i = 0, end = s+n; *s && s < end; i++){
|
||||
j = mblen(s, n);
|
||||
if(j <= 0)
|
||||
j = 1;
|
||||
s += j;
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
/* pattern package error handler */
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -22,12 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <bio.h>
|
||||
#include "awk.h"
|
||||
#include "y.tab.h"
|
||||
|
||||
|
@ -46,7 +44,7 @@ Awkfloat *NF; /* number of fields in current record */
|
|||
Awkfloat *NR; /* number of current record */
|
||||
Awkfloat *FNR; /* number of current record in current file */
|
||||
char **FILENAME; /* current filename argument */
|
||||
Awkfloat *ARGC; /* number of arguments from command line */
|
||||
Awkfloat *AARGC; /* number of arguments from command line */
|
||||
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
|
||||
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
|
||||
Awkfloat *RLENGTH; /* length of same */
|
||||
|
@ -101,12 +99,12 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
|
|||
int i;
|
||||
char temp[50];
|
||||
|
||||
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
|
||||
AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
|
||||
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
|
||||
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
|
||||
cp->sval = (char *) ARGVtab;
|
||||
for (i = 0; i < ac; i++) {
|
||||
sprintf(temp, "%d", i);
|
||||
sprint(temp, "%d", i);
|
||||
if (is_number(*av))
|
||||
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
|
||||
else
|
||||
|
@ -124,7 +122,7 @@ void envinit(char **envp) /* set up ENVIRON variable */
|
|||
ENVtab = makesymtab(NSYMTAB);
|
||||
cp->sval = (char *) ENVtab;
|
||||
for ( ; *envp; envp++) {
|
||||
if ((p = strchr(*envp, '=')) == NULL)
|
||||
if ((p = strchr(*envp, '=')) == nil)
|
||||
continue;
|
||||
*p++ = 0; /* split into two strings at = */
|
||||
if (is_number(p))
|
||||
|
@ -142,9 +140,9 @@ Array *makesymtab(int n) /* make a new symbol table */
|
|||
|
||||
ap = (Array *) malloc(sizeof(Array));
|
||||
tp = (Cell **) calloc(n, sizeof(Cell *));
|
||||
if (ap == NULL || tp == NULL)
|
||||
if (ap == nil || tp == nil)
|
||||
FATAL("out of space in makesymtab");
|
||||
ap->nelem = 0;
|
||||
ap->nelemt = 0;
|
||||
ap->size = n;
|
||||
ap->tab = tp;
|
||||
return(ap);
|
||||
|
@ -159,10 +157,10 @@ void freesymtab(Cell *ap) /* free a symbol table */
|
|||
if (!isarr(ap))
|
||||
return;
|
||||
tp = (Array *) ap->sval;
|
||||
if (tp == NULL)
|
||||
if (tp == nil)
|
||||
return;
|
||||
for (i = 0; i < tp->size; i++) {
|
||||
for (cp = tp->tab[i]; cp != NULL; cp = temp) {
|
||||
for (cp = tp->tab[i]; cp != nil; cp = temp) {
|
||||
xfree(cp->nval);
|
||||
if (freeable(cp))
|
||||
xfree(cp->sval);
|
||||
|
@ -178,14 +176,14 @@ void freesymtab(Cell *ap) /* free a symbol table */
|
|||
void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
|
||||
{
|
||||
Array *tp;
|
||||
Cell *p, *prev = NULL;
|
||||
Cell *p, *prev = nil;
|
||||
int h;
|
||||
|
||||
tp = (Array *) ap->sval;
|
||||
h = hash(s, tp->size);
|
||||
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
|
||||
for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext)
|
||||
if (strcmp(s, p->nval) == 0) {
|
||||
if (prev == NULL) /* 1st one */
|
||||
if (prev == nil) /* 1st one */
|
||||
tp->tab[h] = p->cnext;
|
||||
else /* middle somewhere */
|
||||
prev->cnext = p->cnext;
|
||||
|
@ -193,7 +191,7 @@ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
|
|||
xfree(p->sval);
|
||||
free(p->nval);
|
||||
free(p);
|
||||
tp->nelem--;
|
||||
tp->nelemt--;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -203,13 +201,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
|
|||
int h;
|
||||
Cell *p;
|
||||
|
||||
if (n != NULL && (p = lookup(n, tp)) != NULL) {
|
||||
dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
if (n != nil && (p = lookup(n, tp)) != nil) {
|
||||
dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
p, p->nval, p->sval, p->fval, p->tval) );
|
||||
return(p);
|
||||
}
|
||||
p = (Cell *) malloc(sizeof(Cell));
|
||||
if (p == NULL)
|
||||
if (p == nil)
|
||||
FATAL("out of space for symbol table at %s", n);
|
||||
p->nval = tostring(n);
|
||||
p->sval = s ? tostring(s) : tostring("");
|
||||
|
@ -217,13 +215,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
|
|||
p->tval = t;
|
||||
p->csub = CUNK;
|
||||
p->ctype = OCELL;
|
||||
tp->nelem++;
|
||||
if (tp->nelem > FULLTAB * tp->size)
|
||||
tp->nelemt++;
|
||||
if (tp->nelemt > FULLTAB * tp->size)
|
||||
rehash(tp);
|
||||
h = hash(n, tp->size);
|
||||
p->cnext = tp->tab[h];
|
||||
tp->tab[h] = p;
|
||||
dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
p, p->nval, p->sval, p->fval, p->tval) );
|
||||
return(p);
|
||||
}
|
||||
|
@ -244,7 +242,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */
|
|||
|
||||
nsz = GROWTAB * tp->size;
|
||||
np = (Cell **) calloc(nsz, sizeof(Cell *));
|
||||
if (np == NULL) /* can't do it, but can keep running. */
|
||||
if (np == nil) /* can't do it, but can keep running. */
|
||||
return; /* someone else will run out later. */
|
||||
for (i = 0; i < tp->size; i++) {
|
||||
for (cp = tp->tab[i]; cp; cp = op) {
|
||||
|
@ -265,10 +263,10 @@ Cell *lookup(char *s, Array *tp) /* look for s in tp */
|
|||
int h;
|
||||
|
||||
h = hash(s, tp->size);
|
||||
for (p = tp->tab[h]; p != NULL; p = p->cnext)
|
||||
for (p = tp->tab[h]; p != nil; p = p->cnext)
|
||||
if (strcmp(s, p->nval) == 0)
|
||||
return(p); /* found it */
|
||||
return(NULL); /* not found */
|
||||
return(nil); /* not found */
|
||||
}
|
||||
|
||||
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
||||
|
@ -282,7 +280,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
|||
fldno = atoi(vp->nval);
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %g\n", fldno, f) );
|
||||
dprint( ("setting field %d to %g\n", fldno, f) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
|
@ -291,7 +289,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
|||
xfree(vp->sval); /* free any previous string */
|
||||
vp->tval &= ~STR; /* mark string invalid */
|
||||
vp->tval |= NUM; /* mark number ok */
|
||||
dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
|
||||
dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
|
||||
return vp->fval = f;
|
||||
}
|
||||
|
||||
|
@ -310,7 +308,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
|
|||
char *t;
|
||||
int fldno;
|
||||
|
||||
dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
|
||||
dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "assign to");
|
||||
if (isfld(vp)) {
|
||||
|
@ -318,7 +316,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
|
|||
fldno = atoi(vp->nval);
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
|
||||
dprint( ("setting field %d to %s (%p)\n", fldno, s, s) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
|
@ -329,7 +327,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
|
|||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
vp->tval &= ~DONTFREE;
|
||||
dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
|
||||
dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
|
||||
return(vp->sval = t);
|
||||
}
|
||||
|
||||
|
@ -346,7 +344,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
|
|||
if (is_number(vp->sval) && !(vp->tval&CON))
|
||||
vp->tval |= NUM; /* make NUM only sparingly */
|
||||
}
|
||||
dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
|
||||
dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
|
||||
return(vp->fval);
|
||||
}
|
||||
|
||||
|
@ -365,14 +363,14 @@ char *getsval(Cell *vp) /* get string val of a Cell */
|
|||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
|
||||
sprintf(s, "%.30g", vp->fval);
|
||||
sprint(s, "%.30g", vp->fval);
|
||||
else
|
||||
sprintf(s, *CONVFMT, vp->fval);
|
||||
sprint(s, *CONVFMT, vp->fval);
|
||||
vp->sval = tostring(s);
|
||||
vp->tval &= ~DONTFREE;
|
||||
vp->tval |= STR;
|
||||
}
|
||||
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
|
||||
dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
|
||||
return(vp->sval);
|
||||
}
|
||||
|
||||
|
@ -381,7 +379,7 @@ char *tostring(char *s) /* make a copy of string s */
|
|||
char *p;
|
||||
|
||||
p = (char *) malloc(strlen(s)+1);
|
||||
if (p == NULL)
|
||||
if (p == nil)
|
||||
FATAL("out of space in tostring on %s", s);
|
||||
strcpy(p, s);
|
||||
return(p);
|
||||
|
@ -393,7 +391,7 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
|
|||
int c, n;
|
||||
char *buf, *bp;
|
||||
|
||||
if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
|
||||
if ((buf = (char *) malloc(strlen(s)+3)) == nil)
|
||||
FATAL( "out of space in qstring(%s)", s);
|
||||
for (bp = buf; (c = *s) != delim; s++) {
|
||||
if (c == '\n')
|
||||
|
@ -429,6 +427,6 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
|
|||
}
|
||||
}
|
||||
}
|
||||
*bp++ = 0;
|
||||
*bp = 0;
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "regexp.h"
|
||||
#include "hash.h"
|
||||
|
||||
Hash hash;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "regexp.h"
|
||||
#include "hash.h"
|
||||
|
||||
enum
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
#include <libc.h>
|
||||
#include <bin.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "/sys/src/libregexp/regcomp.h"
|
||||
#include "regexp.h"
|
||||
#include "regcomp.h"
|
||||
#include "dfa.h"
|
||||
|
||||
void rdump(Reprog*);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "regexp.h"
|
||||
#include "/sys/src/libregexp/regcomp.h"
|
||||
#include "dfa.h"
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "regexp.h"
|
||||
#include <ctype.h>
|
||||
#include "dfa.h"
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include "/sys/src/libregexp/regcomp.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <regexp.h>
|
||||
#include "regexp.h"
|
||||
#include "dfa.h"
|
||||
|
||||
/***
|
||||
|
|
|
@ -6,12 +6,12 @@ OFILES=\
|
|||
regerror.$O\
|
||||
regexec.$O\
|
||||
regsub.$O\
|
||||
regaux.$O\
|
||||
rregexec.$O\
|
||||
rregsub.$O\
|
||||
regprint.$O\
|
||||
|
||||
HFILES=/sys/include/regexp.h\
|
||||
regcomp.h\
|
||||
regimpl.h\
|
||||
|
||||
UPDATE=\
|
||||
mkfile\
|
||||
|
@ -21,8 +21,8 @@ UPDATE=\
|
|||
|
||||
</sys/src/cmd/mksyslib
|
||||
|
||||
test: test.$O $OFILES
|
||||
$LD -o test $prereq
|
||||
$O.regextest: tests/regextest.$O $LIB
|
||||
$LD -o $target regextest.$O
|
||||
|
||||
test2: test2.$O $OFILES
|
||||
$LD -o test2 $prereq
|
||||
$O.sysregextest: tests/sysregextest.$O
|
||||
$LD -o $target sysregextest.$O
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include <regexp.h>
|
||||
|
||||
void
|
||||
regerror(char *s)
|
||||
|
|
|
@ -1,232 +1,190 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include "regcomp.h"
|
||||
#include <regexp.h>
|
||||
#include "regimpl.h"
|
||||
|
||||
|
||||
/*
|
||||
* return 0 if no match
|
||||
* >0 if a match
|
||||
* <0 if we ran out of _relist space
|
||||
*/
|
||||
static int
|
||||
regexec1(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
typedef struct RethreadQ RethreadQ;
|
||||
struct RethreadQ
|
||||
{
|
||||
int flag=0;
|
||||
Reinst *inst;
|
||||
Relist *tlp;
|
||||
char *s;
|
||||
int i, checkstart;
|
||||
Rune r, *rp, *ep;
|
||||
int n;
|
||||
Relist* tl; /* This list, next list */
|
||||
Relist* nl;
|
||||
Relist* tle; /* ends of this and next list */
|
||||
Relist* nle;
|
||||
int match;
|
||||
char *p;
|
||||
Rethread *head;
|
||||
Rethread **tail;
|
||||
};
|
||||
|
||||
match = 0;
|
||||
checkstart = j->starttype;
|
||||
if(mp)
|
||||
for(i=0; i<ms; i++) {
|
||||
mp[i].sp = 0;
|
||||
mp[i].ep = 0;
|
||||
int
|
||||
regexec(Reprog *prog, char *str, Resub *sem, int msize)
|
||||
{
|
||||
RethreadQ lists[2], *clist, *nlist, *tmp;
|
||||
Rethread *t, *nextthr, **availthr;
|
||||
Reinst *curinst;
|
||||
Rune r;
|
||||
char *sp, *ep, endc;
|
||||
int i, match, first, gen, matchpri, pri;
|
||||
|
||||
if(msize > NSUBEXPM)
|
||||
msize = NSUBEXPM;
|
||||
|
||||
if(prog->startinst->gen != 0) {
|
||||
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
|
||||
curinst->gen = 0;
|
||||
}
|
||||
|
||||
clist = lists;
|
||||
clist->head = nil;
|
||||
clist->tail = &clist->head;
|
||||
nlist = lists + 1;
|
||||
nlist->head = nil;
|
||||
nlist->tail = &nlist->head;
|
||||
|
||||
for(i = 0; i < prog->nthr; i++)
|
||||
prog->thrpool[i] = prog->threads + i;
|
||||
availthr = prog->thrpool + prog->nthr;
|
||||
|
||||
pri = matchpri = gen = match = 0;
|
||||
sp = str;
|
||||
ep = nil;
|
||||
endc = '\0';
|
||||
if(sem != nil && msize > 0) {
|
||||
if(sem->sp != nil)
|
||||
sp = sem->sp;
|
||||
if(sem->ep != nil && *sem->ep != '\0') {
|
||||
ep = sem->ep;
|
||||
endc = *sem->ep;
|
||||
*sem->ep = '\0';
|
||||
}
|
||||
j->relist[0][0].inst = 0;
|
||||
j->relist[1][0].inst = 0;
|
||||
|
||||
/* Execute machine once for each character, including terminal NUL */
|
||||
s = j->starts;
|
||||
do{
|
||||
/* fast check for first char */
|
||||
if(checkstart) {
|
||||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = utfrune(s, j->startchar);
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol)
|
||||
break;
|
||||
p = utfrune(s, '\n');
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p+1;
|
||||
}
|
||||
r = Runemax + 1;
|
||||
for(; r != L'\0'; sp += i) {
|
||||
gen++;
|
||||
i = chartorune(&r, sp);
|
||||
first = 1;
|
||||
t = clist->head;
|
||||
if(t == nil)
|
||||
goto Start;
|
||||
curinst = t->pc;
|
||||
Again:
|
||||
if(curinst->gen == gen)
|
||||
goto Done;
|
||||
curinst->gen = gen;
|
||||
switch(curinst->op) {
|
||||
case ORUNE:
|
||||
if(r != curinst->r)
|
||||
goto Done;
|
||||
case OANY: /* fallthrough */
|
||||
Any:
|
||||
nextthr = t->next;
|
||||
t->pc = curinst + 1;
|
||||
t->next = nil;
|
||||
*nlist->tail = t;
|
||||
nlist->tail = &t->next;
|
||||
if(nextthr == nil)
|
||||
break;
|
||||
t = nextthr;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
case OCLASS:
|
||||
Class:
|
||||
if(r < curinst->r)
|
||||
goto Done;
|
||||
if(r > curinst->r1) {
|
||||
curinst++;
|
||||
goto Class;
|
||||
}
|
||||
}
|
||||
r = *(uchar*)s;
|
||||
if(r < Runeself)
|
||||
n = 1;
|
||||
else
|
||||
n = chartorune(&r, s);
|
||||
|
||||
/* switch run lists */
|
||||
tl = j->relist[flag];
|
||||
tle = j->reliste[flag];
|
||||
nl = j->relist[flag^=1];
|
||||
nle = j->reliste[flag];
|
||||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
if(match == 0)
|
||||
_renewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
|
||||
for(inst = tlp->inst; ; inst = inst->next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->r == r){
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case LBRA:
|
||||
tlp->se.m[inst->subid].sp = s;
|
||||
continue;
|
||||
case RBRA:
|
||||
tlp->se.m[inst->subid].ep = s;
|
||||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol || *(s-1) == '\n')
|
||||
continue;
|
||||
break;
|
||||
case EOL:
|
||||
if(s == j->eol || r == 0 || r == '\n')
|
||||
continue;
|
||||
break;
|
||||
case CCLASS:
|
||||
ep = inst->cp->end;
|
||||
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NCCLASS:
|
||||
ep = inst->cp->end;
|
||||
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].ep = s;
|
||||
if(mp != 0)
|
||||
_renewmatch(mp, ms, &tlp->se);
|
||||
break;
|
||||
nextthr = t->next;
|
||||
t->pc = curinst->a;
|
||||
t->next = nil;
|
||||
*nlist->tail = t;
|
||||
nlist->tail = &t->next;
|
||||
if(nextthr == nil)
|
||||
break;
|
||||
t = nextthr;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
case ONOTNL:
|
||||
if(r != L'\n') {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
goto Done;
|
||||
case OBOL:
|
||||
if(sp == str || sp[-1] == '\n') {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
goto Done;
|
||||
case OEOL:
|
||||
if(r == L'\0' && ep == nil) {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
if(r == L'\n')
|
||||
goto Any;
|
||||
goto Done;
|
||||
case OJMP:
|
||||
curinst = curinst->a;
|
||||
goto Again;
|
||||
case OSPLIT:
|
||||
nextthr = *--availthr;
|
||||
nextthr->pc = curinst->b;
|
||||
if(msize > 0)
|
||||
memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
|
||||
nextthr->pri = t->pri;
|
||||
nextthr->next = t->next;
|
||||
t->next = nextthr;
|
||||
curinst = curinst->a;
|
||||
goto Again;
|
||||
case OSAVE:
|
||||
if(curinst->sub < msize)
|
||||
t->sem[curinst->sub].sp = sp;
|
||||
curinst++;
|
||||
goto Again;
|
||||
case OUNSAVE:
|
||||
if(curinst->sub == 0) {
|
||||
/* "Highest" priority is the left-most longest. */
|
||||
if (t->pri > matchpri)
|
||||
goto Done;
|
||||
match = 1;
|
||||
matchpri = t->pri;
|
||||
if(sem != nil && msize > 0) {
|
||||
memcpy(sem, t->sem, sizeof(Resub)*msize);
|
||||
sem->ep = sp;
|
||||
}
|
||||
break;
|
||||
goto Done;
|
||||
}
|
||||
if(curinst->sub < msize)
|
||||
t->sem[curinst->sub].ep = sp;
|
||||
curinst++;
|
||||
goto Again;
|
||||
Done:
|
||||
*availthr++ = t;
|
||||
t = t->next;
|
||||
if(t == nil)
|
||||
break;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
}
|
||||
if(s == j->eol)
|
||||
Start:
|
||||
/* Start again once if we haven't found anything. */
|
||||
if(first == 1 && match == 0) {
|
||||
first = 0;
|
||||
t = *--availthr;
|
||||
if(msize > 0)
|
||||
memset(t->sem, 0, sizeof(Resub)*msize);
|
||||
/* "Lower" priority thread */
|
||||
t->pri = matchpri = pri++;
|
||||
t->next = nil;
|
||||
curinst = prog->startinst;
|
||||
goto Again;
|
||||
}
|
||||
/* If we have a match and no extant threads, we are done. */
|
||||
if(match == 1 && nlist->head == nil)
|
||||
break;
|
||||
checkstart = j->starttype && nl->inst==0;
|
||||
s += n;
|
||||
}while(r);
|
||||
tmp = clist;
|
||||
clist = nlist;
|
||||
nlist = tmp;
|
||||
nlist->head = nil;
|
||||
nlist->tail = &nlist->head;
|
||||
}
|
||||
if(ep != nil)
|
||||
*ep = endc;
|
||||
return match;
|
||||
}
|
||||
|
||||
static int
|
||||
regexec2(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
int rv;
|
||||
Relist *relist0, *relist1;
|
||||
|
||||
/* mark space */
|
||||
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist0 == nil)
|
||||
return -1;
|
||||
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist1 == nil){
|
||||
free(relist1);
|
||||
return -1;
|
||||
}
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
|
||||
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
|
||||
|
||||
rv = regexec1(progp, bol, mp, ms, j);
|
||||
free(relist0);
|
||||
free(relist1);
|
||||
return rv;
|
||||
}
|
||||
|
||||
extern int
|
||||
regexec(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements at mp */
|
||||
{
|
||||
Reljunk j;
|
||||
Relist relist0[LISTSIZE], relist1[LISTSIZE];
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* use user-specified starting/ending location if specified
|
||||
*/
|
||||
j.starts = bol;
|
||||
j.eol = 0;
|
||||
if(mp && ms>0){
|
||||
if(mp->sp)
|
||||
j.starts = mp->sp;
|
||||
if(mp->ep)
|
||||
j.eol = mp->ep;
|
||||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->r;
|
||||
}
|
||||
if(progp->startinst->type == BOL)
|
||||
j.starttype = BOL;
|
||||
|
||||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = regexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
rv = regexec2(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
return -1;
|
||||
}
|
||||
|
|
104
sys/src/libregexp/regimpl.h
Normal file
104
sys/src/libregexp/regimpl.h
Normal file
|
@ -0,0 +1,104 @@
|
|||
enum
|
||||
{
|
||||
LANY = 0,
|
||||
LBOL,
|
||||
LCLASS,
|
||||
LEND,
|
||||
LEOL,
|
||||
LLPAR,
|
||||
LOR,
|
||||
LREP,
|
||||
LRPAR,
|
||||
LRUNE,
|
||||
|
||||
TANY = 0,
|
||||
TBOL,
|
||||
TCAT,
|
||||
TCLASS,
|
||||
TEOL,
|
||||
TNOTNL,
|
||||
TOR,
|
||||
TPLUS,
|
||||
TQUES,
|
||||
TRUNE,
|
||||
TSTAR,
|
||||
TSUB,
|
||||
|
||||
NSUBEXPM = 32
|
||||
};
|
||||
|
||||
typedef struct Parselex Parselex;
|
||||
typedef struct Renode Renode;
|
||||
|
||||
struct Parselex
|
||||
{
|
||||
/* Parse */
|
||||
Renode *next;
|
||||
Renode *nodes;
|
||||
int sub;
|
||||
int instrs;
|
||||
jmp_buf exitenv;
|
||||
/* Lex */
|
||||
void (*getnextr)(Parselex*);
|
||||
char *rawexp;
|
||||
char *orig;
|
||||
Rune rune;
|
||||
Rune peek;
|
||||
int peeklex;
|
||||
int done;
|
||||
int literal;
|
||||
Rune cpairs[400+2];
|
||||
int nc;
|
||||
};
|
||||
struct Renode
|
||||
{
|
||||
int op;
|
||||
Renode *left;
|
||||
Rune r;
|
||||
union
|
||||
{
|
||||
Rune r1;
|
||||
int sub;
|
||||
Renode *right;
|
||||
};
|
||||
int nclass;
|
||||
};
|
||||
struct Rethread
|
||||
{
|
||||
Reinst *pc;
|
||||
Resub sem[NSUBEXPM];
|
||||
int pri;
|
||||
Rethread *next;
|
||||
};
|
||||
struct Reinst
|
||||
{
|
||||
char op;
|
||||
int gen;
|
||||
Reinst *a;
|
||||
union
|
||||
{
|
||||
Rune r;
|
||||
int sub;
|
||||
};
|
||||
union
|
||||
{
|
||||
Rune r1;
|
||||
Reinst *b;
|
||||
};
|
||||
};
|
||||
|
||||
static int lex(Parselex*);
|
||||
static void getnextr(Parselex*);
|
||||
static void getnextrlit(Parselex*);
|
||||
static void getclass(Parselex*);
|
||||
static Renode *e0(Parselex*);
|
||||
static Renode *e1(Parselex*);
|
||||
static Renode *e2(Parselex*);
|
||||
static Renode *e3(Parselex*);
|
||||
static Renode *buildclass(Parselex*);
|
||||
static Renode *buildclassn(Parselex*);
|
||||
static int pcmp(void*, void*);
|
||||
static Reprog *regcomp1(char*, int, int);
|
||||
static Reinst *compile(Renode*, Reprog*, int);
|
||||
static Reinst *compile1(Renode*, Reinst*, int*, int);
|
||||
static void prtree(Renode*, int, int);
|
66
sys/src/libregexp/regprint.c
Normal file
66
sys/src/libregexp/regprint.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <regexp.h>
|
||||
#include <regimpl.h>
|
||||
|
||||
static int
|
||||
fmtprinst(Fmt *f, Reinst *inst)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = fmtprint(f, "%p ", inst);
|
||||
switch(inst->op) {
|
||||
case ORUNE:
|
||||
r += fmtprint(f, "ORUNE\t%C\n", inst->r);
|
||||
break;
|
||||
case ONOTNL:
|
||||
r += fmtprint(f, "ONOTNL\n");
|
||||
break;
|
||||
case OCLASS:
|
||||
r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a);
|
||||
break;
|
||||
case OSPLIT:
|
||||
r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b);
|
||||
break;
|
||||
case OJMP:
|
||||
r += fmtprint(f, "OJMP \t%p\n", inst->a);
|
||||
break;
|
||||
case OSAVE:
|
||||
r += fmtprint(f, "OSAVE\t%d\n", inst->sub);
|
||||
break;
|
||||
case OUNSAVE:
|
||||
r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub);
|
||||
break;
|
||||
case OANY:
|
||||
r += fmtprint(f, "OANY \t.\n");
|
||||
break;
|
||||
case OEOL:
|
||||
r += fmtprint(f, "OEOL \t$\n");
|
||||
break;
|
||||
case OBOL:
|
||||
r += fmtprint(f, "OBOL \t^\n");
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
fmtprprog(Fmt *f, Reprog *reprog)
|
||||
{
|
||||
Reinst *inst;
|
||||
int r;
|
||||
|
||||
r = 0;
|
||||
for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++)
|
||||
r += fmtprinst(f, inst);
|
||||
return r;
|
||||
}
|
||||
|
||||
int
|
||||
reprogfmt(Fmt *f)
|
||||
{
|
||||
Reprog *r;
|
||||
|
||||
r = va_arg(f->args, Reprog*);
|
||||
return fmtprprog(f, r);
|
||||
}
|
|
@ -1,63 +1,66 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include <regexp.h>
|
||||
|
||||
/* substitute into one string using the matches from the last regexec() */
|
||||
extern void
|
||||
regsub(char *sp, /* source string */
|
||||
char *dp, /* destination string */
|
||||
int dlen,
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements pointed to by mp */
|
||||
void
|
||||
regsub(char *src, char *dst, int dlen, Resub *match, int msize)
|
||||
{
|
||||
char *ssp, *ep;
|
||||
int i;
|
||||
char *ep, c;
|
||||
|
||||
ep = dp+dlen-1;
|
||||
while(*sp != '\0'){
|
||||
if(*sp == '\\'){
|
||||
switch(*++sp){
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
i = *sp-'0';
|
||||
if(mp!=0 && mp[i].sp != 0 && ms>i)
|
||||
for(ssp = mp[i].sp;
|
||||
ssp < mp[i].ep;
|
||||
ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
break;
|
||||
case '\\':
|
||||
if(dp < ep)
|
||||
*dp++ = '\\';
|
||||
break;
|
||||
case '\0':
|
||||
sp--;
|
||||
break;
|
||||
default:
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
ep = dst + dlen-1;
|
||||
for(;*src != '\0'; src++) switch(*src) {
|
||||
case '\\':
|
||||
switch(*++src) {
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
i = *src - '0';
|
||||
if(match != nil && i < msize && match[i].ep != nil) {
|
||||
c = *match[i].ep;
|
||||
*match[i].ep = '\0';
|
||||
dst = strecpy(dst, ep+1, match[i].sp);
|
||||
*match[i].ep = c;
|
||||
}
|
||||
}else if(*sp == '&'){
|
||||
if(mp!=0 && mp[0].sp != 0 && ms>0)
|
||||
for(ssp = mp[0].sp;
|
||||
ssp < mp[0].ep; ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
}else{
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
case '\\':
|
||||
if(dst < ep)
|
||||
*dst++ = '\\';
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
case '\0':
|
||||
goto End;
|
||||
default:
|
||||
if(dst < ep)
|
||||
*dst++ = *src;
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
}
|
||||
sp++;
|
||||
break;
|
||||
case '&':
|
||||
if(match != nil && msize > 0 && match[0].sp != nil) {
|
||||
c = *match[0].ep;
|
||||
*match[0].ep = '\0';
|
||||
dst = strecpy(dst, ep+1, match[0].sp);
|
||||
*match[0].ep = c;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if(dst < ep)
|
||||
*dst++ = *src;
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
}
|
||||
*dp = '\0';
|
||||
End:
|
||||
*dst = '\0';
|
||||
}
|
||||
|
|
|
@ -1,212 +1,189 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include "regcomp.h"
|
||||
#include <regexp.h>
|
||||
#include "regimpl.h"
|
||||
|
||||
/*
|
||||
* return 0 if no match
|
||||
* >0 if a match
|
||||
* <0 if we ran out of _relist space
|
||||
*/
|
||||
static int
|
||||
rregexec1(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j)
|
||||
typedef struct RethreadQ RethreadQ;
|
||||
struct RethreadQ
|
||||
{
|
||||
int flag=0;
|
||||
Reinst *inst;
|
||||
Relist *tlp;
|
||||
Rune *s;
|
||||
int i, checkstart;
|
||||
Rune r, *rp, *ep;
|
||||
Relist* tl; /* This list, next list */
|
||||
Relist* nl;
|
||||
Relist* tle; /* ends of this and next list */
|
||||
Relist* nle;
|
||||
int match;
|
||||
Rune *p;
|
||||
Rethread *head;
|
||||
Rethread **tail;
|
||||
};
|
||||
|
||||
match = 0;
|
||||
checkstart = j->startchar;
|
||||
if(mp)
|
||||
for(i=0; i<ms; i++) {
|
||||
mp[i].rsp = 0;
|
||||
mp[i].rep = 0;
|
||||
int
|
||||
rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
|
||||
{
|
||||
RethreadQ lists[2], *clist, *nlist, *tmp;
|
||||
Rethread *t, *nextthr, **availthr;
|
||||
Reinst *curinst;
|
||||
Rune *rsp, *rep, endr, last;
|
||||
int i, match, first, gen, pri, matchpri;
|
||||
|
||||
if(msize > NSUBEXPM)
|
||||
msize = NSUBEXPM;
|
||||
|
||||
if(prog->startinst->gen != 0) {
|
||||
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
|
||||
curinst->gen = 0;
|
||||
}
|
||||
|
||||
clist = lists;
|
||||
clist->head = nil;
|
||||
clist->tail = &clist->head;
|
||||
nlist = lists + 1;
|
||||
nlist->head = nil;
|
||||
nlist->tail = &nlist->head;
|
||||
|
||||
for(i = 0; i < prog->nthr; i++)
|
||||
prog->thrpool[i] = prog->threads + i;
|
||||
availthr = prog->thrpool + prog->nthr;
|
||||
|
||||
pri = matchpri = gen = match = 0;
|
||||
rsp = str;
|
||||
rep = nil;
|
||||
endr = L'\0';
|
||||
if(sem != nil && msize > 0) {
|
||||
if(sem->rsp != nil)
|
||||
rsp = sem->rsp;
|
||||
if(sem->rep != nil && *sem->rep != L'\0') {
|
||||
rep = sem->rep;
|
||||
endr = *sem->rep;
|
||||
*sem->rep = '\0';
|
||||
}
|
||||
j->relist[0][0].inst = 0;
|
||||
j->relist[1][0].inst = 0;
|
||||
|
||||
/* Execute machine once for each character, including terminal NUL */
|
||||
s = j->rstarts;
|
||||
do{
|
||||
/* fast check for first char */
|
||||
if(checkstart) {
|
||||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = runestrchr(s, j->startchar);
|
||||
if(p == 0 || s == j->reol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol)
|
||||
break;
|
||||
p = runestrchr(s, '\n');
|
||||
if(p == 0 || s == j->reol)
|
||||
return match;
|
||||
s = p+1;
|
||||
}
|
||||
last = 1;
|
||||
for(; last != L'\0'; rsp++) {
|
||||
gen++;
|
||||
last = *rsp;
|
||||
first = 1;
|
||||
t = clist->head;
|
||||
if(t == nil)
|
||||
goto Start;
|
||||
curinst = t->pc;
|
||||
Again:
|
||||
if(curinst->gen == gen)
|
||||
goto Done;
|
||||
curinst->gen = gen;
|
||||
switch(curinst->op) {
|
||||
case ORUNE:
|
||||
if(*rsp != curinst->r)
|
||||
goto Done;
|
||||
case OANY: /* fallthrough */
|
||||
Any:
|
||||
nextthr = t->next;
|
||||
t->pc = curinst + 1;
|
||||
t->next = nil;
|
||||
*nlist->tail = t;
|
||||
nlist->tail = &t->next;
|
||||
if(nextthr == nil)
|
||||
break;
|
||||
t = nextthr;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
case OCLASS:
|
||||
Class:
|
||||
if(*rsp < curinst->r)
|
||||
goto Done;
|
||||
if(*rsp > curinst->r1) {
|
||||
curinst++;
|
||||
goto Class;
|
||||
}
|
||||
}
|
||||
|
||||
r = *s;
|
||||
|
||||
/* switch run lists */
|
||||
tl = j->relist[flag];
|
||||
tle = j->reliste[flag];
|
||||
nl = j->relist[flag^=1];
|
||||
nle = j->reliste[flag];
|
||||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
_rrenewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){
|
||||
for(inst=tlp->inst; ; inst = inst->next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->r == r)
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case LBRA:
|
||||
tlp->se.m[inst->subid].rsp = s;
|
||||
continue;
|
||||
case RBRA:
|
||||
tlp->se.m[inst->subid].rep = s;
|
||||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol || *(s-1) == '\n')
|
||||
continue;
|
||||
break;
|
||||
case EOL:
|
||||
if(s == j->reol || r == 0 || r == '\n')
|
||||
continue;
|
||||
break;
|
||||
case CCLASS:
|
||||
ep = inst->cp->end;
|
||||
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NCCLASS:
|
||||
ep = inst->cp->end;
|
||||
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].rep = s;
|
||||
if(mp != 0)
|
||||
_renewmatch(mp, ms, &tlp->se);
|
||||
break;
|
||||
nextthr = t->next;
|
||||
t->pc = curinst->a;
|
||||
t->next = nil;
|
||||
*nlist->tail = t;
|
||||
nlist->tail = &t->next;
|
||||
if(nextthr == nil)
|
||||
break;
|
||||
t = nextthr;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
case ONOTNL:
|
||||
if(*rsp != L'\n') {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
goto Done;
|
||||
case OBOL:
|
||||
if(rsp == str || rsp[-1] == L'\n') {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
goto Done;
|
||||
case OEOL:
|
||||
if(*rsp == L'\0' && rep == nil) {
|
||||
curinst++;
|
||||
goto Again;
|
||||
}
|
||||
if(*rsp == '\n')
|
||||
goto Any;
|
||||
goto Done;
|
||||
case OJMP:
|
||||
curinst = curinst->a;
|
||||
goto Again;
|
||||
case OSPLIT:
|
||||
nextthr = *--availthr;
|
||||
nextthr->pc = curinst->b;
|
||||
if(msize > 0)
|
||||
memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
|
||||
nextthr->pri = t->pri;
|
||||
nextthr->next = t->next;
|
||||
t->next = nextthr;
|
||||
curinst = curinst->a;
|
||||
goto Again;
|
||||
case OSAVE:
|
||||
if(curinst->sub < msize)
|
||||
t->sem[curinst->sub].rsp = rsp;
|
||||
curinst++;
|
||||
goto Again;
|
||||
case OUNSAVE:
|
||||
if(curinst->sub == 0) {
|
||||
/* "Highest" priority is the left-most longest. */
|
||||
if (t->pri > matchpri)
|
||||
goto Done;
|
||||
match = 1;
|
||||
matchpri = t->pri;
|
||||
if(sem != nil && msize > 0) {
|
||||
memcpy(sem, t->sem, sizeof(Resub)*msize);
|
||||
sem->rep = rsp;
|
||||
}
|
||||
break;
|
||||
goto Done;
|
||||
}
|
||||
if(curinst->sub < msize)
|
||||
t->sem[curinst->sub].rep = rsp;
|
||||
curinst++;
|
||||
goto Again;
|
||||
Done:
|
||||
*availthr++ = t;
|
||||
t = t->next;
|
||||
if(t == nil)
|
||||
break;
|
||||
curinst = t->pc;
|
||||
goto Again;
|
||||
}
|
||||
if(s == j->reol)
|
||||
Start:
|
||||
/* Start again once if we haven't found anything. */
|
||||
if(first == 1 && match == 0) {
|
||||
first = 0;
|
||||
t = *--availthr;
|
||||
if(msize > 0)
|
||||
memset(t->sem, 0, sizeof(Resub)*msize);
|
||||
/* "Lower" priority thread */
|
||||
t->pri = matchpri = pri++;
|
||||
t->next = nil;
|
||||
curinst = prog->startinst;
|
||||
goto Again;
|
||||
}
|
||||
/* If we have a match and no extant threads, we are done. */
|
||||
if(match == 1 && nlist->head == nil)
|
||||
break;
|
||||
checkstart = j->startchar && nl->inst==0;
|
||||
s++;
|
||||
}while(r);
|
||||
tmp = clist;
|
||||
clist = nlist;
|
||||
nlist = tmp;
|
||||
nlist->head = nil;
|
||||
nlist->tail = &nlist->head;
|
||||
}
|
||||
if(rep != nil)
|
||||
*rep = endr;
|
||||
return match;
|
||||
}
|
||||
|
||||
static int
|
||||
rregexec2(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
|
||||
|
||||
/* mark space */
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j->reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
return rregexec1(progp, bol, mp, ms, j);
|
||||
}
|
||||
|
||||
extern int
|
||||
rregexec(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements at mp */
|
||||
{
|
||||
Reljunk j;
|
||||
Relist relist0[LISTSIZE], relist1[LISTSIZE];
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* use user-specified starting/ending location if specified
|
||||
*/
|
||||
j.rstarts = bol;
|
||||
j.reol = 0;
|
||||
if(mp && ms>0){
|
||||
if(mp->sp)
|
||||
j.rstarts = mp->rsp;
|
||||
if(mp->ep)
|
||||
j.reol = mp->rep;
|
||||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->r;
|
||||
}
|
||||
if(progp->startinst->type == BOL)
|
||||
j.starttype = BOL;
|
||||
|
||||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = rregexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
rv = rregexec2(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -1,64 +1,66 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "regexp.h"
|
||||
#include <regexp.h>
|
||||
|
||||
/* substitute into one string using the matches from the last regexec() */
|
||||
extern void
|
||||
rregsub(Rune *sp, /* source string */
|
||||
Rune *dp, /* destination string */
|
||||
int dlen,
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements pointed to by mp */
|
||||
void
|
||||
rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize)
|
||||
{
|
||||
Rune *ssp, *ep;
|
||||
int i;
|
||||
Rune *ep, r;
|
||||
|
||||
ep = dp+(dlen/sizeof(Rune))-1;
|
||||
while(*sp != '\0'){
|
||||
if(*sp == '\\'){
|
||||
switch(*++sp){
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
i = *sp-'0';
|
||||
if(mp[i].rsp != 0 && mp!=0 && ms>i)
|
||||
for(ssp = mp[i].rsp;
|
||||
ssp < mp[i].rep;
|
||||
ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
break;
|
||||
case '\\':
|
||||
if(dp < ep)
|
||||
*dp++ = '\\';
|
||||
break;
|
||||
case '\0':
|
||||
sp--;
|
||||
break;
|
||||
default:
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
ep = dst + dlen-1;
|
||||
for(;*src != L'\0'; src++) switch(*src) {
|
||||
case L'\\':
|
||||
switch(*++src) {
|
||||
case L'0':
|
||||
case L'1':
|
||||
case L'2':
|
||||
case L'3':
|
||||
case L'4':
|
||||
case L'5':
|
||||
case L'6':
|
||||
case L'7':
|
||||
case L'8':
|
||||
case L'9':
|
||||
i = *src - L'0';
|
||||
if(match != nil && i < msize && match[i].rsp != nil) {
|
||||
r = *match[i].rep;
|
||||
*match[i].rep = L'\0';
|
||||
dst = runestrecpy(dst, ep+1, match[i].rsp);
|
||||
*match[i].rep = r;
|
||||
}
|
||||
}else if(*sp == '&'){
|
||||
if(mp[0].rsp != 0 && mp!=0 && ms>0)
|
||||
if(mp[0].rsp != 0)
|
||||
for(ssp = mp[0].rsp;
|
||||
ssp < mp[0].rep; ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
}else{
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
case L'\\':
|
||||
if(dst < ep)
|
||||
*dst++ = L'\\';
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
case L'\0':
|
||||
goto End;
|
||||
default:
|
||||
if(dst < ep)
|
||||
*dst++ = *src;
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
}
|
||||
sp++;
|
||||
break;
|
||||
case L'&':
|
||||
if(match != nil && msize > 0 && match[0].rsp != nil) {
|
||||
r = *match[0].rep;
|
||||
*match[0].rep = L'\0';
|
||||
dst = runestrecpy(dst, ep+1, match[0].rsp);
|
||||
*match[0].rep = r;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if(dst < ep)
|
||||
*dst++ = *src;
|
||||
else
|
||||
goto End;
|
||||
break;
|
||||
}
|
||||
*dp = '\0';
|
||||
End:
|
||||
*dst = L'\0';
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue