New libregexp and APE ported to native

This commit is contained in:
ben 2016-04-26 22:23:44 -05:00
parent 651d6c2bc6
commit 0a460e1722
29 changed files with 1861 additions and 1677 deletions

View file

@ -1,15 +1,29 @@
#pragma src "/sys/src/libregexp" #pragma src "/sys/src/libregexp"
#pragma lib "libregexp.a" #pragma lib "libregexp.a"
enum
{
OANY = 0,
OBOL,
OCLASS,
OEOL,
OJMP,
ONOTNL,
ORUNE,
OSAVE,
OSPLIT,
OUNSAVE,
};
typedef struct Resub Resub; typedef struct Resub Resub;
typedef struct Reclass Reclass; typedef struct Reinst Reinst;
typedef struct Reinst Reinst; typedef struct Reprog Reprog;
typedef struct Reprog Reprog; typedef struct Rethread Rethread;
/* #pragma incomplete Reinst
* Sub expression matches #pragma incomplete Rethread
*/
struct Resub{ struct Resub
{
union union
{ {
char *sp; char *sp;
@ -21,46 +35,22 @@ struct Resub{
Rune *rep; Rune *rep;
}; };
}; };
struct Reprog
/* {
* character class, each pair of rune's defines a range Reinst *startinst;
*/ Rethread *threads;
struct Reclass{ Rethread **thrpool;
Rune *end; char *regstr;
Rune spans[64]; int len;
int nthr;
}; };
/* Reprog* regcomp(char*);
* Machine instructions Reprog* regcomplit(char*);
*/ Reprog* regcompnl(char*);
struct Reinst{ void regerror(char*);
int type; int regexec(Reprog*, char*, Resub*, int);
union { void regsub(char*, char*, int, Resub*, int);
Reclass *cp; /* class pointer */ int rregexec(Reprog*, Rune*, Resub*, int);
Rune r; /* character */ void rregsub(Rune*, Rune*, int, Resub*, int);
int subid; /* sub-expression id for RBRA and LBRA */ int reprogfmt(Fmt *);
Reinst *right; /* right child of OR */
};
union { /* regexp relies on these two being in the same union */
Reinst *left; /* left child of OR */
Reinst *next; /* next instruction for CAT & LBRA */
};
};
/*
* Reprogram definition
*/
struct Reprog{
Reinst *startinst; /* start pc */
Reclass class[16]; /* .data */
Reinst firstinst[5]; /* .text */
};
extern Reprog *regcomp(char*);
extern Reprog *regcomplit(char*);
extern Reprog *regcompnl(char*);
extern void regerror(char*);
extern int regexec(Reprog*, char*, Resub*, int);
extern void regsub(char*, char*, int, Resub*, int);
extern int rregexec(Reprog*, Rune*, Resub*, int);
extern void rregsub(Rune*, Rune*, int, Resub*, int);

View file

@ -6,20 +6,20 @@ Copyright (c) Lucent Technologies 1997
typedef double Awkfloat; typedef double Awkfloat;
/* unsigned char is more trouble than it's worth */ #define xfree(a) { if ((a) != nil) { free((a)); (a) = nil; } }
typedef unsigned char uschar;
#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } }
#define DEBUG #define DEBUG
#ifdef DEBUG #ifdef DEBUG
/* uses have to be doubly parenthesized */ /* uses have to be doubly parenthesized */
# define dprintf(x) if (dbg) printf x # define dprint(x) if (dbg) print x
#else #else
# define dprintf(x) # define dprint(x)
#endif #endif
#define FOPEN_MAX 40 /* max number of open files */
#define EOF -1
extern char errbuf[]; extern char errbuf[];
extern int compile_time; /* 1 if compiling, 0 if running */ extern int compile_time; /* 1 if compiling, 0 if running */
@ -28,6 +28,10 @@ extern int safe; /* 0 => unsafe, 1 => safe */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */ extern int recsize; /* size of current record, orig RECSIZE */
extern Biobuf stdin;
extern Biobuf stdout;
extern Biobuf stderr;
extern char **FS; extern char **FS;
extern char **RS; extern char **RS;
extern char **ORS; extern char **ORS;
@ -56,8 +60,8 @@ extern int patlen; /* length of pattern matched. set in b.c */
/* Cell: all information about a variable or constant */ /* Cell: all information about a variable or constant */
typedef struct Cell { typedef struct Cell {
uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */ uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */
uschar csub; /* CCON, CTEMP, CFLD, etc. */ uchar csub; /* CCON, CTEMP, CFLD, etc. */
char *nval; /* name, for variables only */ char *nval; /* name, for variables only */
char *sval; /* string value */ char *sval; /* string value */
Awkfloat fval; /* value as number */ Awkfloat fval; /* value as number */
@ -66,7 +70,7 @@ typedef struct Cell {
} Cell; } Cell;
typedef struct Array { /* symbol table array */ typedef struct Array { /* symbol table array */
int nelem; /* elements in table right now */ int nelemt; /* elements in table right now */
int size; /* size of tab */ int size; /* size of tab */
Cell **tab; /* hash table pointers */ Cell **tab; /* hash table pointers */
} Array; } Array;

View file

@ -23,8 +23,9 @@ THIS SOFTWARE.
****************************************************************/ ****************************************************************/
%{ %{
#include <stdio.h> #include <u.h>
#include <string.h> #include <libc.h>
#include <bio.h>
#include "awk.h" #include "awk.h"
#define makedfa(a,b) compre(a) #define makedfa(a,b) compre(a)

View file

@ -22,10 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
#include <stdio.h> #include <u.h>
#include <stdlib.h> #include <libc.h>
#include <string.h>
#include <ctype.h> #include <ctype.h>
#include <bio.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
@ -90,9 +90,8 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
{ "while", WHILE, WHILE }, { "while", WHILE, WHILE },
}; };
#define DEBUG
#ifdef DEBUG #ifdef DEBUG
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } #define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); }
#else #else
#define RET(x) return(x) #define RET(x) return(x)
#endif #endif
@ -170,7 +169,7 @@ int yylex(void)
static char *buf = 0; static char *buf = 0;
static int bufsize = 500; static int bufsize = 500;
if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
FATAL( "out of space in yylex" ); FATAL( "out of space in yylex" );
if (sc) { if (sc) {
sc = 0; sc = 0;
@ -353,7 +352,7 @@ int string(void)
static char *buf = 0; static char *buf = 0;
static int bufsz = 500; static int bufsz = 500;
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for strings"); FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) { for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0)) if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
@ -401,7 +400,7 @@ int string(void)
} }
*px = 0; *px = 0;
unput(c); unput(c);
sscanf(xbuf, "%x", &n); n = strtol(xbuf, nil, 16);
*bp++ = n; *bp++ = n;
break; break;
} }
@ -497,7 +496,7 @@ int regexpr(void)
static int bufsz = 500; static int bufsz = 500;
char *bp; char *bp;
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for rex expr"); FATAL("out of space for rex expr");
bp = buf; bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) { for ( ; (c = input()) != '/' && c != 0; ) {
@ -526,7 +525,7 @@ char ebuf[300];
char *ep = ebuf; char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */ char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf; char *yysptr = yysbuf;
FILE *yyin = 0; Biobuf *yyin;
int input(void) /* get next lexical input character */ int input(void) /* get next lexical input character */
{ {
@ -535,7 +534,7 @@ int input(void) /* get next lexical input character */
if (yysptr > yysbuf) if (yysptr > yysbuf)
c = *--yysptr; c = *--yysptr;
else if (lexprog != NULL) { /* awk '...' */ else if (lexprog != nil) { /* awk '...' */
if ((c = *lexprog) != 0) if ((c = *lexprog) != 0)
lexprog++; lexprog++;
} else /* awk -f ... */ } else /* awk -f ... */

View file

@ -22,17 +22,14 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
#define DEBUG #include <u.h>
#include <stdio.h> #include <libc.h>
#include <string.h>
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <bio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
FILE *infile = NULL; Biobuf *infile;
char *file = ""; char *file = "";
char *record; char *record;
int recsize = RECSIZE; int recsize = RECSIZE;
@ -50,17 +47,17 @@ int donerec; /* 1 = record is valid (no flds have changed) */
int lastfld = 0; /* last used field */ int lastfld = 0; /* last used field */
int argno = 1; /* current input argument number */ int argno = 1; /* current input argument number */
extern Awkfloat *ARGC; extern Awkfloat *AARGC;
static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE };
static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE };
void recinit(unsigned int n) void recinit(unsigned int n)
{ {
record = (char *) malloc(n); record = (char *) malloc(n);
fields = (char *) malloc(n); fields = (char *) malloc(n);
fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *)); fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
if (record == NULL || fields == NULL || fldtab == NULL) if (record == nil || fields == nil || fldtab == nil)
FATAL("out of space for $0 and fields"); FATAL("out of space for $0 and fields");
fldtab[0] = (Cell *) malloc(sizeof (Cell)); fldtab[0] = (Cell *) malloc(sizeof (Cell));
*fldtab[0] = dollar0; *fldtab[0] = dollar0;
@ -76,10 +73,10 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
for (i = n1; i <= n2; i++) { for (i = n1; i <= n2; i++) {
fldtab[i] = (Cell *) malloc(sizeof (struct Cell)); fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
if (fldtab[i] == NULL) if (fldtab[i] == nil)
FATAL("out of space in makefields %d", i); FATAL("out of space in makefields %d", i);
*fldtab[i] = dollar1; *fldtab[i] = dollar1;
sprintf(temp, "%d", i); sprint(temp, "%d", i);
fldtab[i]->nval = tostring(temp); fldtab[i]->nval = tostring(temp);
} }
} }
@ -89,7 +86,7 @@ void initgetrec(void)
int i; int i;
char *p; char *p;
for (i = 1; i < *ARGC; i++) { for (i = 1; i < *AARGC; i++) {
if (!isclvar(p = getargv(i))) { /* find 1st real filename */ if (!isclvar(p = getargv(i))) { /* find 1st real filename */
setsval(lookup("FILENAME", symtab), getargv(i)); setsval(lookup("FILENAME", symtab), getargv(i));
return; return;
@ -97,7 +94,7 @@ void initgetrec(void)
setclvar(p); /* a commandline assignment before filename */ setclvar(p); /* a commandline assignment before filename */
argno++; argno++;
} }
infile = stdin; /* no filenames, so use stdin */ infile = &stdin; /* no filenames, so use &stdin */
} }
int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
@ -111,16 +108,16 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
firsttime = 0; firsttime = 0;
initgetrec(); initgetrec();
} }
dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n",
*RS, *FS, *ARGC, *FILENAME) ); *RS, *FS, *AARGC, *FILENAME) );
if (isrecord) { if (isrecord) {
donefld = 0; donefld = 0;
donerec = 1; donerec = 1;
} }
buf[0] = 0; buf[0] = 0;
while (argno < *ARGC || infile == stdin) { while (argno < *AARGC || infile == &stdin) {
dprintf( ("argno=%d, file=|%s|\n", argno, file) ); dprint( ("argno=%d, file=|%s|\n", argno, file) );
if (infile == NULL) { /* have to open a new file */ if (infile == nil) { /* have to open a new file */
file = getargv(argno); file = getargv(argno);
if (*file == '\0') { /* it's been zapped */ if (*file == '\0') { /* it's been zapped */
argno++; argno++;
@ -132,10 +129,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
continue; continue;
} }
*FILENAME = file; *FILENAME = file;
dprintf( ("opening file %s\n", file) ); dprint( ("opening file %s\n", file) );
if (*file == '-' && *(file+1) == '\0') if (*file == '-' && *(file+1) == '\0')
infile = stdin; infile = &stdin;
else if ((infile = fopen(file, "r")) == NULL) else if ((infile = Bopen(file, OREAD)) == nil)
FATAL("can't open file %s", file); FATAL("can't open file %s", file);
setfval(fnrloc, 0.0); setfval(fnrloc, 0.0);
} }
@ -158,9 +155,9 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
return 1; return 1;
} }
/* EOF arrived on this file; set up next */ /* EOF arrived on this file; set up next */
if (infile != stdin) if (infile != &stdin)
fclose(infile); Bterm(infile);
infile = NULL; infile = nil;
argno++; argno++;
} }
*pbuf = buf; *pbuf = buf;
@ -170,13 +167,13 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
void nextfile(void) void nextfile(void)
{ {
if (infile != stdin) if (infile != &stdin)
fclose(infile); Bterm(infile);
infile = NULL; infile = nil;
argno++; argno++;
} }
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ int readrec(char **pbuf, int *pbufsize, Biobuf *inf) /* read one record into buf */
{ {
int sep, c; int sep, c;
char *rr, *buf = *pbuf; char *rr, *buf = *pbuf;
@ -187,13 +184,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
strcpy(inputFS, *FS); /* for subsequent field splitting */ strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) { if ((sep = **RS) == 0) {
sep = '\n'; sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ while ((c=Bgetc(inf)) == '\n' && c != EOF) /* skip leading \n's */
; ;
if (c != EOF) if (c != EOF)
ungetc(c, inf); Bungetc(inf);
} }
for (rr = buf; ; ) { for (rr = buf; ; ) {
for (; (c=getc(inf)) != sep && c != EOF; ) { for (; (c=Bgetc(inf)) != sep && c != EOF; ) {
if (rr-buf+1 > bufsize) if (rr-buf+1 > bufsize)
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
@ -201,7 +198,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
} }
if (**RS == sep || c == EOF) if (**RS == sep || c == EOF)
break; break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ if ((c = Bgetc(inf)) == '\n' || c == EOF) /* 2 in a row */
break; break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
@ -211,7 +208,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
*rr = 0; *rr = 0;
dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); dprint( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf; *pbuf = buf;
*pbufsize = bufsize; *pbufsize = bufsize;
return c == EOF && rr == buf ? 0 : 1; return c == EOF && rr == buf ? 0 : 1;
@ -223,10 +220,10 @@ char *getargv(int n) /* get ARGV[n] */
char *s, temp[50]; char *s, temp[50];
extern Array *ARGVtab; extern Array *ARGVtab;
sprintf(temp, "%d", n); sprint(temp, "%d", n);
x = setsymtab(temp, "", 0.0, STR, ARGVtab); x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x); s = getsval(x);
dprintf( ("getargv(%d) returns |%s|\n", n, s) ); dprint( ("getargv(%d) returns |%s|\n", n, s) );
return s; return s;
} }
@ -245,7 +242,7 @@ void setclvar(char *s) /* set var=value from s */
q->fval = atof(q->sval); q->fval = atof(q->sval);
q->tval |= NUM; q->tval |= NUM;
} }
dprintf( ("command line set %s to |%s|\n", s, p) ); dprint( ("command line set %s to |%s|\n", s, p) );
} }
@ -265,7 +262,7 @@ void fldbld(void) /* create fields from current record */
n = strlen(r); n = strlen(r);
if (n > fieldssize) { if (n > fieldssize) {
xfree(fields); xfree(fields);
if ((fields = (char *) malloc(n+1)) == NULL) if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in fldbld %d", n); FATAL("out of space for fields in fldbld %d", n);
fieldssize = n; fieldssize = n;
} }
@ -273,7 +270,7 @@ void fldbld(void) /* create fields from current record */
i = 0; /* number of fields accumulated here */ i = 0; /* number of fields accumulated here */
if (strlen(inputFS) > 1) { /* it's a regular expression */ if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS); i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */ } else if (*inputFS == ' ') { /* default whitespace */
for (i = 0; ; ) { for (i = 0; ; ) {
while (*r == ' ' || *r == '\t' || *r == '\n') while (*r == ' ' || *r == '\t' || *r == '\n')
r++; r++;
@ -339,7 +336,7 @@ void fldbld(void) /* create fields from current record */
if (dbg) { if (dbg) {
for (j = 0; j <= lastfld; j++) { for (j = 0; j <= lastfld; j++) {
p = fldtab[j]; p = fldtab[j];
printf("field %d (%s): |%s|\n", j, p->nval, p->sval); print("field %d (%s): |%s|\n", j, p->nval, p->sval);
} }
} }
} }
@ -383,7 +380,7 @@ void growfldtab(int n) /* make new fields up to at least $n */
if (n > nf) if (n > nf)
nf = n; nf = n;
fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *))); fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
if (fldtab == NULL) if (fldtab == nil)
FATAL("out of space creating %d fields", nf); FATAL("out of space creating %d fields", nf);
makefields(nfields+1, nf); makefields(nfields+1, nf);
nfields = nf; nfields = nf;
@ -395,12 +392,12 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
/* the fields are all stored in this one array with \0's */ /* the fields are all stored in this one array with \0's */
char *fr; char *fr;
void *p; void *p;
int i, tempstat, n; int i, n;
n = strlen(rec); n = strlen(rec);
if (n > fieldssize) { if (n > fieldssize) {
xfree(fields); xfree(fields);
if ((fields = (char *) malloc(n+1)) == NULL) if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in refldbld %d", n); FATAL("out of space for fields in refldbld %d", n);
fieldssize = n; fieldssize = n;
} }
@ -409,7 +406,7 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
if (*rec == '\0') if (*rec == '\0')
return 0; return 0;
p = compre(fs); p = compre(fs);
dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
for (i = 1; ; i++) { for (i = 1; ; i++) {
if (i > nfields) if (i > nfields)
growfldtab(i); growfldtab(i);
@ -417,15 +414,15 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
xfree(fldtab[i]->sval); xfree(fldtab[i]->sval);
fldtab[i]->tval = FLD | STR | DONTFREE; fldtab[i]->tval = FLD | STR | DONTFREE;
fldtab[i]->sval = fr; fldtab[i]->sval = fr;
dprintf( ("refldbld: i=%d\n", i) ); dprint( ("refldbld: i=%d\n", i) );
if (nematch(p, rec, rec)) { if (nematch(p, rec, rec)) {
dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); dprint( ("match %s (%d chars)\n", patbeg, patlen) );
strncpy(fr, rec, patbeg-rec); strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1; fr += patbeg - rec + 1;
*(fr-1) = '\0'; *(fr-1) = '\0';
rec = patbeg + patlen; rec = patbeg + patlen;
} else { } else {
dprintf( ("no match %s\n", rec) ); dprint( ("no match %s\n", rec) );
strcpy(fr, rec); strcpy(fr, rec);
break; break;
} }
@ -457,15 +454,15 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3")) if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
FATAL("built giant record `%.30s...'", record); FATAL("built giant record `%.30s...'", record);
*r = '\0'; *r = '\0';
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
if (freeable(fldtab[0])) if (freeable(fldtab[0]))
xfree(fldtab[0]->sval); xfree(fldtab[0]->sval);
fldtab[0]->tval = REC | STR | DONTFREE; fldtab[0]->tval = REC | STR | DONTFREE;
fldtab[0]->sval = record; fldtab[0]->sval = record;
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
dprintf( ("recbld = |%s|\n", record) ); dprint( ("recbld = |%s|\n", record) );
donerec = 1; donerec = 1;
} }
@ -484,24 +481,26 @@ void SYNTAX(char *fmt, ...)
if (been_here++ > 2) if (been_here++ > 2)
return; return;
fprintf(stderr, "%s: ", cmdname); Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt); va_start(varg, fmt);
vfprintf(stderr, fmt, varg); Bvprint(&stderr, fmt, varg);
va_end(varg); va_end(varg);
if(compile_time == 1 && cursource() != NULL) if(compile_time == 1 && cursource() != nil)
fprintf(stderr, " at %s:%d", cursource(), lineno); Bprint(&stderr, " at %s:%d", cursource(), lineno);
else else
fprintf(stderr, " at line %d", lineno); Bprint(&stderr, " at line %d", lineno);
if (curfname != NULL) if (curfname != nil)
fprintf(stderr, " in function %s", curfname); Bprint(&stderr, " in function %s", curfname);
fprintf(stderr, "\n"); Bprint(&stderr, "\n");
errorflag = 2; errorflag = 2;
eprint(); eprint();
} }
void fpecatch(int n) int handler(void *, char *err)
{ {
FATAL("floating point exception %d", n); Bflush(&stdout);
fprint(2, "%s\n", err);
return 0;
} }
extern int bracecnt, brackcnt, parencnt; extern int bracecnt, brackcnt, parencnt;
@ -520,16 +519,16 @@ void bracecheck(void)
bcheck2(parencnt, '(', ')'); bcheck2(parencnt, '(', ')');
} }
void bcheck2(int n, int c1, int c2) void bcheck2(int n, int, int c2)
{ {
if (n == 1) if (n == 1)
fprintf(stderr, "\tmissing %c\n", c2); Bprint(&stderr, "\tmissing %c\n", c2);
else if (n > 1) else if (n > 1)
fprintf(stderr, "\t%d missing %c's\n", n, c2); Bprint(&stderr, "\t%d missing %c's\n", n, c2);
else if (n == -1) else if (n == -1)
fprintf(stderr, "\textra %c\n", c2); Bprint(&stderr, "\textra %c\n", c2);
else if (n < -1) else if (n < -1)
fprintf(stderr, "\t%d extra %c's\n", -n, c2); Bprint(&stderr, "\t%d extra %c's\n", -n, c2);
} }
void FATAL(char *fmt, ...) void FATAL(char *fmt, ...)
@ -537,15 +536,15 @@ void FATAL(char *fmt, ...)
extern char *cmdname; extern char *cmdname;
va_list varg; va_list varg;
fflush(stdout); Bflush(&stdout);
fprintf(stderr, "%s: ", cmdname); Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt); va_start(varg, fmt);
vfprintf(stderr, fmt, varg); Bvprint(&stderr, fmt, varg);
va_end(varg); va_end(varg);
error(); error();
if (dbg > 1) /* core dump if serious debugging on */ if (dbg > 1) /* core dump if serious debugging on */
abort(); abort();
exit(2); exits("FATAL");
} }
void WARNING(char *fmt, ...) void WARNING(char *fmt, ...)
@ -553,10 +552,10 @@ void WARNING(char *fmt, ...)
extern char *cmdname; extern char *cmdname;
va_list varg; va_list varg;
fflush(stdout); Bflush(&stdout);
fprintf(stderr, "%s: ", cmdname); Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt); va_start(varg, fmt);
vfprintf(stderr, fmt, varg); Bvprint(&stderr, fmt, varg);
va_end(varg); va_end(varg);
error(); error();
} }
@ -566,13 +565,13 @@ void error()
extern Node *curnode; extern Node *curnode;
int line; int line;
fprintf(stderr, "\n"); Bprint(&stderr, "\n");
if (compile_time != 2 && NR && *NR > 0) { if (compile_time != 2 && NR && *NR > 0) {
if (strcmp(*FILENAME, "-") != 0) if (strcmp(*FILENAME, "-") != 0)
fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR)); Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
else else
fprintf(stderr, " input record number %d", (int) (*FNR)); Bprint(&stderr, " input record number %d", (int) (*FNR));
fprintf(stderr, "\n"); Bprint(&stderr, "\n");
} }
if (compile_time != 2 && curnode) if (compile_time != 2 && curnode)
line = curnode->lineno; line = curnode->lineno;
@ -580,14 +579,14 @@ void error()
line = lineno; line = lineno;
else else
line = -1; line = -1;
if (compile_time == 1 && cursource() != NULL){ if (compile_time == 1 && cursource() != nil){
if(line >= 0) if(line >= 0)
fprintf(stderr, " source %s:%d", cursource(), line); Bprint(&stderr, " source %s:%d", cursource(), line);
else else
fprintf(stderr, " source file %s", cursource()); Bprint(&stderr, " source file %s", cursource());
}else if(line >= 0) }else if(line >= 0)
fprintf(stderr, " source line %d", line); Bprint(&stderr, " source line %d", line);
fprintf(stderr, "\n"); Bprint(&stderr, "\n");
eprint(); eprint();
} }
@ -607,23 +606,23 @@ void eprint(void) /* try to print context around error */
; ;
while (*p == '\n') while (*p == '\n')
p++; p++;
fprintf(stderr, " context is\n\t"); Bprint(&stderr, " context is\n\t");
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
; ;
for ( ; p < q; p++) for ( ; p < q; p++)
if (*p) if (*p)
putc(*p, stderr); Bputc(&stderr, *p);
fprintf(stderr, " >>> "); Bprint(&stderr, " >>> ");
for ( ; p < ep; p++) for ( ; p < ep; p++)
if (*p) if (*p)
putc(*p, stderr); Bputc(&stderr, *p);
fprintf(stderr, " <<< "); Bprint(&stderr, " <<< ");
if (*ep) if (*ep)
while ((c = input()) != '\n' && c != '\0' && c != EOF) { while ((c = input()) != '\n' && c != '\0' && c != EOF) {
putc(c, stderr); Bputc(&stderr, c);
bclass(c); bclass(c);
} }
putc('\n', stderr); Bputc(&stderr, '\n');
ep = ebuf; ep = ebuf;
} }
@ -642,12 +641,10 @@ void bclass(int c)
double errcheck(double x, char *s) double errcheck(double x, char *s)
{ {
if (errno == EDOM) { if (isNaN(x)) {
errno = 0;
WARNING("%s argument out of domain", s); WARNING("%s argument out of domain", s);
x = 1; x = 1;
} else if (errno == ERANGE) { } else if (isInf(x, 1) || isInf(x, -1)) {
errno = 0;
WARNING("%s result out of range", s); WARNING("%s result out of range", s);
x = 1; x = 1;
} }
@ -668,7 +665,6 @@ int isclvar(char *s) /* is s of form var=something ? */
/* strtod is supposed to be a proper test of what's a valid number */ /* strtod is supposed to be a proper test of what's a valid number */
#include <math.h>
int is_number(char *s) int is_number(char *s)
{ {
double r; double r;
@ -699,9 +695,8 @@ int is_number(char *s)
return 0; /* can't be a number */ return 0; /* can't be a number */
} }
errno = 0;
r = strtod(s, &ep); r = strtod(s, &ep);
if (ep == s || r == HUGE_VAL || errno == ERANGE) if (ep == s || isInf(r, 1) || isInf(r, -1))
return 0; return 0;
while (*ep == ' ' || *ep == '\t' || *ep == '\n') while (*ep == ' ' || *ep == '\t' || *ep == '\n')
ep++; ep++;

View file

@ -24,21 +24,21 @@ THIS SOFTWARE.
char *version = "version 19990602"; char *version = "version 19990602";
#define DEBUG #include <u.h>
#include <stdio.h> #include <libc.h>
#include <ctype.h> #include <bio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
extern char **environ;
extern int nfields; extern int nfields;
Biobuf stdin;
Biobuf stdout;
Biobuf stderr;
int dbg = 0; int dbg = 0;
char *cmdname; /* gets argv[0] for error messages */ char *cmdname; /* gets argv[0] for error messages */
extern FILE *yyin; /* lex input file */ extern Biobuf *yyin; /* lex input file */
char *lexprog; /* points to program argument if it exists */ char *lexprog; /* points to program argument if it exists */
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
int compile_time = 2; /* for error printing: */ int compile_time = 2; /* for error printing: */
@ -50,18 +50,23 @@ int curpfile = 0; /* current filename */
int safe = 0; /* 1 => "safe" mode */ int safe = 0; /* 1 => "safe" mode */
int main(int argc, char *argv[]) void main(int argc, char *argv[])
{ {
char *fs = NULL, *marg; char *fs = nil, *marg;
int temp; int temp;
Binit(&stdin, 0, OREAD);
Binit(&stdout, 1, OWRITE);
Binit(&stderr, 2, OWRITE);
cmdname = argv[0]; cmdname = argv[0];
if (argc == 1) { if (argc == 1) {
fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname); Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
exit(1); exits("usage");
} }
signal(SIGFPE, fpecatch);
yyin = NULL; atnotify(handler, 1);
yyin = nil;
symtab = makesymtab(NSYMTAB); symtab = makesymtab(NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
@ -94,7 +99,7 @@ int main(int argc, char *argv[])
else if (argc > 1 && argv[1][0] != 0) else if (argc > 1 && argv[1][0] != 0)
fs = &argv[1][0]; fs = &argv[1][0];
} }
if (fs == NULL || *fs == '\0') if (fs == nil || *fs == '\0')
WARNING("field separator FS is empty"); WARNING("field separator FS is empty");
break; break;
case 'v': /* -v a=1 to be done NOW. one -v for each */ case 'v': /* -v a=1 to be done NOW. one -v for each */
@ -120,11 +125,11 @@ int main(int argc, char *argv[])
dbg = atoi(&argv[1][2]); dbg = atoi(&argv[1][2]);
if (dbg == 0) if (dbg == 0)
dbg = 1; dbg = 1;
printf("awk %s\n", version); print("awk %s\n", version);
break; break;
case 'V': /* added for exptools "standard" */ case 'V': /* added for exptools "standard" */
printf("awk %s\n", version); print("awk %s\n", version);
exit(0); exits(0);
break; break;
default: default:
WARNING("unknown option %s ignored", argv[1]); WARNING("unknown option %s ignored", argv[1]);
@ -137,10 +142,10 @@ int main(int argc, char *argv[])
if (npfile == 0) { /* no -f; first argument is program */ if (npfile == 0) { /* no -f; first argument is program */
if (argc <= 1) { if (argc <= 1) {
if (dbg) if (dbg)
exit(0); exits(0);
FATAL("no program given"); FATAL("no program given");
} }
dprintf( ("program = |%s|\n", argv[1]) ); dprint( ("program = |%s|\n", argv[1]) );
lexprog = argv[1]; lexprog = argv[1];
argc--; argc--;
argv++; argv++;
@ -149,20 +154,20 @@ int main(int argc, char *argv[])
syminit(); syminit();
compile_time = 1; compile_time = 1;
argv[0] = cmdname; /* put prog name at front of arglist */ argv[0] = cmdname; /* put prog name at front of arglist */
dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
arginit(argc, argv); arginit(argc, argv);
if (!safe)
envinit(environ);
yyparse(); yyparse();
if (fs) if (fs)
*FS = qstring(fs, '\0'); *FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) ); dprint( ("errorflag=%d\n", errorflag) );
if (errorflag == 0) { if (errorflag == 0) {
compile_time = 0; compile_time = 0;
run(winner); run(winner);
} else } else
bracecheck(); bracecheck();
return(errorflag); if(errorflag)
exits("error");
exits(0);
} }
int pgetc(void) /* get 1 character from awk program */ int pgetc(void) /* get 1 character from awk program */
@ -170,20 +175,20 @@ int pgetc(void) /* get 1 character from awk program */
int c; int c;
for (;;) { for (;;) {
if (yyin == NULL) { if (yyin == nil) {
if (curpfile >= npfile) if (curpfile >= npfile)
return EOF; return EOF;
if (strcmp(pfile[curpfile], "-") == 0) if (strcmp(pfile[curpfile], "-") == 0)
yyin = stdin; yyin = &stdin;
else if ((yyin = fopen(pfile[curpfile], "r")) == NULL) else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil)
FATAL("can't open file %s", pfile[curpfile]); FATAL("can't open file %s", pfile[curpfile]);
lineno = 1; lineno = 1;
} }
if ((c = getc(yyin)) != EOF) if ((c = Bgetc(yyin)) != EOF)
return c; return c;
if (yyin != stdin) if (yyin != &stdin)
fclose(yyin); Bterm(yyin);
yyin = NULL; yyin = nil;
curpfile++; curpfile++;
} }
} }
@ -193,5 +198,5 @@ char *cursource(void) /* current source file name */
if (npfile > 0) if (npfile > 0)
return pfile[curpfile]; return pfile[curpfile];
else else
return NULL; return nil;
} }

View file

@ -28,9 +28,9 @@ THIS SOFTWARE.
* it finds the indices in y.tab.h, produced by yacc. * it finds the indices in y.tab.h, produced by yacc.
*/ */
#include <stdio.h> #include <u.h>
#include <string.h> #include <libc.h>
#include <stdlib.h> #include <bio.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
@ -39,7 +39,7 @@ struct xx
char *name; char *name;
char *pname; char *pname;
} proc[] = { } proc[] = {
{ PROGRAM, "program", NULL }, { PROGRAM, "program", nil },
{ BOR, "boolop", " || " }, { BOR, "boolop", " || " },
{ AND, "boolop", " && " }, { AND, "boolop", " && " },
{ NOT, "boolop", " !" }, { NOT, "boolop", " !" },
@ -49,13 +49,13 @@ struct xx
{ LT, "relop", " < " }, { LT, "relop", " < " },
{ GE, "relop", " >= " }, { GE, "relop", " >= " },
{ GT, "relop", " > " }, { GT, "relop", " > " },
{ ARRAY, "array", NULL }, { ARRAY, "array", nil },
{ INDIRECT, "indirect", "$(" }, { INDIRECT, "indirect", "$(" },
{ SUBSTR, "substr", "substr" }, { SUBSTR, "substr", "substr" },
{ SUB, "sub", "sub" }, { SUB, "sub", "sub" },
{ GSUB, "gsub", "gsub" }, { GSUB, "gsub", "gsub" },
{ INDEX, "sindex", "sindex" }, { INDEX, "sindex", "sindex" },
{ SPRINTF, "awksprintf", "sprintf " }, { SPRINTF, "awksprintf", "sprintf" },
{ ADD, "arith", " + " }, { ADD, "arith", " + " },
{ MINUS, "arith", " - " }, { MINUS, "arith", " - " },
{ MULT, "arith", " * " }, { MULT, "arith", " * " },
@ -68,8 +68,8 @@ struct xx
{ PREDECR, "incrdecr", "--" }, { PREDECR, "incrdecr", "--" },
{ POSTDECR, "incrdecr", "--" }, { POSTDECR, "incrdecr", "--" },
{ CAT, "cat", " " }, { CAT, "cat", " " },
{ PASTAT, "pastat", NULL }, { PASTAT, "pastat", nil },
{ PASTAT2, "dopa2", NULL }, { PASTAT2, "dopa2", nil },
{ MATCH, "matchop", " ~ " }, { MATCH, "matchop", " ~ " },
{ NOTMATCH, "matchop", " !~ " }, { NOTMATCH, "matchop", " !~ " },
{ MATCHFCN, "matchop", "matchop" }, { MATCHFCN, "matchop", "matchop" },
@ -110,59 +110,62 @@ struct xx
char *table[SIZE]; char *table[SIZE];
char *names[SIZE]; char *names[SIZE];
int main(int argc, char *argv[]) void main(int, char**)
{ {
struct xx *p; struct xx *p;
int i, n, tok; int i, tok;
char c; Biobuf *fp;
FILE *fp; char *buf, *toks[3];
char buf[200], name[200], def[200];
printf("#include <stdio.h>\n"); print("#include <u.h>\n");
printf("#include \"awk.h\"\n"); print("#include <libc.h>\n");
printf("#include \"y.tab.h\"\n\n"); print("#include <bio.h>\n");
print("#include \"awk.h\"\n");
print("#include \"y.tab.h\"\n\n");
for (i = SIZE; --i >= 0; ) for (i = SIZE; --i >= 0; )
names[i] = ""; names[i] = "";
if ((fp = fopen("y.tab.h", "r")) == NULL) { if ((fp = Bopen("y.tab.h", OREAD)) == nil) {
fprintf(stderr, "maketab can't open y.tab.h!\n"); fprint(2, "maketab can't open y.tab.h!\n");
exit(1); exits("can't open y.tab.h");
} }
printf("static char *printname[%d] = {\n", SIZE); print("static char *printname[%d] = {\n", SIZE);
i = 0; i = 0;
while (fgets(buf, sizeof buf, fp) != NULL) { while ((buf = Brdline(fp, '\n')) != nil) {
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); buf[Blinelen(fp)-1] = '\0';
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ tokenize(buf, toks, 3);
if (toks[0] == nil || strcmp("#define", toks[0]) != 0) /* not a valid #define */
continue; continue;
tok = strtol(toks[2], nil, 10);
if (tok < FIRSTTOKEN || tok > LASTTOKEN) { if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
fprintf(stderr, "maketab funny token %d %s\n", tok, buf); fprint(2, "maketab funny token %d %s\n", tok, buf);
exit(1); exits("funny token");
} }
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1);
strcpy(names[tok-FIRSTTOKEN], name); strcpy(names[tok-FIRSTTOKEN], toks[1]);
printf("\t(char *) \"%s\",\t/* %d */\n", name, tok); print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok);
i++; i++;
} }
printf("};\n\n"); print("};\n\n");
for (p=proc; p->token!=0; p++) for (p=proc; p->token!=0; p++)
table[p->token-FIRSTTOKEN] = p->name; table[p->token-FIRSTTOKEN] = p->name;
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++) for (i=0; i<SIZE; i++)
if (table[i]==0) if (table[i]==0)
printf("\tnullproc,\t/* %s */\n", names[i]); print("\tnullproc,\t/* %s */\n", names[i]);
else else
printf("\t%s,\t/* %s */\n", table[i], names[i]); print("\t%s,\t/* %s */\n", table[i], names[i]);
printf("};\n\n"); print("};\n\n");
printf("char *tokname(int n)\n"); /* print a tokname() function */ print("char *tokname(int n)\n"); /* print a tokname() function */
printf("{\n"); print("{\n");
printf(" static char buf[100];\n\n"); print(" static char buf[100];\n\n");
printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
printf(" sprintf(buf, \"token %%d\", n);\n"); print(" sprint(buf, \"token %%d\", n);\n");
printf(" return buf;\n"); print(" return buf;\n");
printf(" }\n"); print(" }\n");
printf(" return printname[n-FIRSTTOKEN];\n"); print(" return printname[n-FIRSTTOKEN];\n");
printf("}\n"); print("}\n");
return 0; exits(0);
} }

View file

@ -6,6 +6,7 @@ OFILES=re.$O\
main.$O\ main.$O\
parse.$O\ parse.$O\
proctab.$O\ proctab.$O\
popen.$O\
tran.$O\ tran.$O\
lib.$O\ lib.$O\
run.$O\ run.$O\
@ -28,11 +29,6 @@ UPDATE=\
${TARG:%=/386/bin/%}\ ${TARG:%=/386/bin/%}\
</sys/src/cmd/mkone </sys/src/cmd/mkone
CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE -D_BSD_EXTENSION -DUTF
YFLAGS=-S -d -v
CC=pcc
LD=pcc
cpuobjtype=`{sed -n 's/^O=//p' /$cputype/mkfile}
y.tab.h awkgram.c: $YFILES y.tab.h awkgram.c: $YFILES
$YACC -o awkgram.c $YFLAGS $prereq $YACC -o awkgram.c $YFLAGS $prereq
@ -43,10 +39,10 @@ clean:V:
nuke:V: nuke:V:
rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG
proctab.c: $cpuobjtype.maketab proctab.c: $O.maketab
./$cpuobjtype.maketab >proctab.c ./$O.maketab >proctab.c
$cpuobjtype.maketab: y.tab.h maketab.c $O.maketab: y.tab.h maketab.c
objtype=$cputype objtype=$cputype
mk maketab.$cputype mk maketab.$cputype

View file

@ -22,10 +22,9 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
#define DEBUG #include <u.h>
#include <stdio.h> #include <libc.h>
#include <string.h> #include <bio.h>
#include <stdlib.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
@ -34,9 +33,9 @@ Node *nodealloc(int n)
Node *x; Node *x;
x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
if (x == NULL) if (x == nil)
FATAL("out of space in nodealloc"); FATAL("out of space in nodealloc");
x->nnext = NULL; x->nnext = nil;
x->lineno = lineno; x->lineno = lineno;
return(x); return(x);
} }
@ -220,11 +219,11 @@ Node *linkum(Node *a, Node *b)
if (errorflag) /* don't link things that are wrong */ if (errorflag) /* don't link things that are wrong */
return a; return a;
if (a == NULL) if (a == nil)
return(b); return(b);
else if (b == NULL) else if (b == nil)
return(a); return(a);
for (c = a; c->nnext != NULL; c = c->nnext) for (c = a; c->nnext != nil; c = c->nnext)
; ;
c->nnext = b; c->nnext = b;
return(a); return(a);
@ -245,7 +244,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
for (p = vl; p; p = p->nnext) for (p = vl; p; p = p->nnext)
n++; n++;
v->fval = n; v->fval = n;
dprintf( ("defining func %s (%d args)\n", v->nval, n) ); dprint( ("defining func %s (%d args)\n", v->nval, n) );
} }
int isarg(char *s) /* is s in argument list for current function? */ int isarg(char *s) /* is s in argument list for current function? */
@ -262,7 +261,7 @@ int isarg(char *s) /* is s in argument list for current function? */
int ptoi(void *p) /* convert pointer to integer */ int ptoi(void *p) /* convert pointer to integer */
{ {
return (int) (long) p; /* swearing that p fits, of course */ return (int) (vlong) p; /* swearing that p fits, of course */
} }
Node *itonp(int i) /* and vice versa */ Node *itonp(int i) /* and vice versa */

View file

@ -44,7 +44,6 @@ extern void quoted(char **, char **, char *);
extern int match(void *, char *, char *); extern int match(void *, char *, char *);
extern int pmatch(void *, char *, char *); extern int pmatch(void *, char *, char *);
extern int nematch(void *, char *, char *); extern int nematch(void *, char *, char *);
extern int countposn(char *, int);
extern void overflow(void); extern void overflow(void);
extern int pgetc(void); extern int pgetc(void);
@ -100,7 +99,7 @@ extern void makefields(int, int);
extern void growfldtab(int n); extern void growfldtab(int n);
extern int getrec(char **, int *, int); extern int getrec(char **, int *, int);
extern void nextfile(void); extern void nextfile(void);
extern int readrec(char **buf, int *bufsize, FILE *inf); extern int readrec(char **buf, int *bufsize, Biobuf *inf);
extern char *getargv(int); extern char *getargv(int);
extern void setclvar(char *); extern void setclvar(char *);
extern void fldbld(void); extern void fldbld(void);
@ -110,7 +109,7 @@ extern int refldbld(char *, char *);
extern void recbld(void); extern void recbld(void);
extern Cell *fieldadr(int); extern Cell *fieldadr(int);
extern void yyerror(char *); extern void yyerror(char *);
extern void fpecatch(int); extern int handler(void*, char*);
extern void bracecheck(void); extern void bracecheck(void);
extern void bcheck2(int, int, int); extern void bcheck2(int, int, int);
extern void SYNTAX(char *, ...); extern void SYNTAX(char *, ...);
@ -165,13 +164,13 @@ extern Cell *instat(Node **, int);
extern Cell *bltin(Node **, int); extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int); extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int); extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *); extern Biobuf *redirect(int, Node *);
extern FILE *openfile(int, char *); extern Biobuf *openfile(int, char *);
extern char *filename(FILE *); extern char *filename(Biobuf *);
extern Cell *closefile(Node **, int); extern Cell *closefile(Node **, int);
extern void closeall(void); extern void closeall(void);
extern Cell *sub(Node **, int); extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int); extern Cell *gsub(Node **, int);
extern FILE *popen(const char *, const char *); extern Biobuf *popen(char *, int);
extern int pclose(FILE *); extern int pclose(Biobuf *);

View file

@ -22,18 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
#include <u.h>
#define DEBUG #include <libc.h>
#include <stdio.h>
#include <ctype.h> #include <ctype.h>
#include <setjmp.h> #include <bio.h>
#include <math.h> #include <regexp.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
#include "regexp.h"
/* This file provides the interface between the main body of /* This file provides the interface between the main body of
* awk and the pattern matching package. It preprocesses * awk and the pattern matching package. It preprocesses
@ -198,11 +193,11 @@ pmatch(void *p, char *s, char *start)
{ {
Resub m; Resub m;
m.s.sp = start; m.sp = start;
m.e.ep = 0; m.ep = 0;
if (regexec((Reprog *) p, (char *) s, &m, 1)) { if (regexec((Reprog *) p, (char *) s, &m, 1)) {
patbeg = m.s.sp; patbeg = m.sp;
patlen = m.e.ep-m.s.sp; patlen = m.ep-m.sp;
return 1; return 1;
} }
patlen = -1; patlen = -1;
@ -250,7 +245,7 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
{ {
char *p = *s; char *p = *s;
char *t = *to; char *t = *to;
wchar_t c; Rune c;
switch(c = *p++) { switch(c = *p++) {
case 't': case 't':
@ -273,8 +268,8 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
*t++ = '\\'; *t++ = '\\';
if (c == 'x') { /* hexadecimal goo follows */ if (c == 'x') { /* hexadecimal goo follows */
c = hexstr(&p); c = hexstr(&p);
if (t < end-MB_CUR_MAX) if (t < end-UTFmax)
t += wctomb(t, c); t += runelen(c);
else overflow(); else overflow();
*to = t; *to = t;
*s = p; *s = p;
@ -294,21 +289,6 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
*s = p; *s = p;
*to = t; *to = t;
} }
/* count rune positions */
int
countposn(char *s, int n)
{
int i, j;
char *end;
for (i = 0, end = s+n; *s && s < end; i++){
j = mblen(s, n);
if(j <= 0)
j = 1;
s += j;
}
return(i);
}
/* pattern package error handler */ /* pattern package error handler */

File diff suppressed because it is too large Load diff

View file

@ -22,12 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
#define DEBUG #include <u.h>
#include <stdio.h> #include <libc.h>
#include <math.h>
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <bio.h>
#include <stdlib.h>
#include "awk.h" #include "awk.h"
#include "y.tab.h" #include "y.tab.h"
@ -46,7 +44,7 @@ Awkfloat *NF; /* number of fields in current record */
Awkfloat *NR; /* number of current record */ Awkfloat *NR; /* number of current record */
Awkfloat *FNR; /* number of current record in current file */ Awkfloat *FNR; /* number of current record in current file */
char **FILENAME; /* current filename argument */ char **FILENAME; /* current filename argument */
Awkfloat *ARGC; /* number of arguments from command line */ Awkfloat *AARGC; /* number of arguments from command line */
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
Awkfloat *RLENGTH; /* length of same */ Awkfloat *RLENGTH; /* length of same */
@ -101,12 +99,12 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
int i; int i;
char temp[50]; char temp[50];
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab); cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
cp->sval = (char *) ARGVtab; cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) { for (i = 0; i < ac; i++) {
sprintf(temp, "%d", i); sprint(temp, "%d", i);
if (is_number(*av)) if (is_number(*av))
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
else else
@ -124,7 +122,7 @@ void envinit(char **envp) /* set up ENVIRON variable */
ENVtab = makesymtab(NSYMTAB); ENVtab = makesymtab(NSYMTAB);
cp->sval = (char *) ENVtab; cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) { for ( ; *envp; envp++) {
if ((p = strchr(*envp, '=')) == NULL) if ((p = strchr(*envp, '=')) == nil)
continue; continue;
*p++ = 0; /* split into two strings at = */ *p++ = 0; /* split into two strings at = */
if (is_number(p)) if (is_number(p))
@ -142,9 +140,9 @@ Array *makesymtab(int n) /* make a new symbol table */
ap = (Array *) malloc(sizeof(Array)); ap = (Array *) malloc(sizeof(Array));
tp = (Cell **) calloc(n, sizeof(Cell *)); tp = (Cell **) calloc(n, sizeof(Cell *));
if (ap == NULL || tp == NULL) if (ap == nil || tp == nil)
FATAL("out of space in makesymtab"); FATAL("out of space in makesymtab");
ap->nelem = 0; ap->nelemt = 0;
ap->size = n; ap->size = n;
ap->tab = tp; ap->tab = tp;
return(ap); return(ap);
@ -159,10 +157,10 @@ void freesymtab(Cell *ap) /* free a symbol table */
if (!isarr(ap)) if (!isarr(ap))
return; return;
tp = (Array *) ap->sval; tp = (Array *) ap->sval;
if (tp == NULL) if (tp == nil)
return; return;
for (i = 0; i < tp->size; i++) { for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp != NULL; cp = temp) { for (cp = tp->tab[i]; cp != nil; cp = temp) {
xfree(cp->nval); xfree(cp->nval);
if (freeable(cp)) if (freeable(cp))
xfree(cp->sval); xfree(cp->sval);
@ -178,14 +176,14 @@ void freesymtab(Cell *ap) /* free a symbol table */
void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
{ {
Array *tp; Array *tp;
Cell *p, *prev = NULL; Cell *p, *prev = nil;
int h; int h;
tp = (Array *) ap->sval; tp = (Array *) ap->sval;
h = hash(s, tp->size); h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext)
if (strcmp(s, p->nval) == 0) { if (strcmp(s, p->nval) == 0) {
if (prev == NULL) /* 1st one */ if (prev == nil) /* 1st one */
tp->tab[h] = p->cnext; tp->tab[h] = p->cnext;
else /* middle somewhere */ else /* middle somewhere */
prev->cnext = p->cnext; prev->cnext = p->cnext;
@ -193,7 +191,7 @@ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
xfree(p->sval); xfree(p->sval);
free(p->nval); free(p->nval);
free(p); free(p);
tp->nelem--; tp->nelemt--;
return; return;
} }
} }
@ -203,13 +201,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
int h; int h;
Cell *p; Cell *p;
if (n != NULL && (p = lookup(n, tp)) != NULL) { if (n != nil && (p = lookup(n, tp)) != nil) {
dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) ); p, p->nval, p->sval, p->fval, p->tval) );
return(p); return(p);
} }
p = (Cell *) malloc(sizeof(Cell)); p = (Cell *) malloc(sizeof(Cell));
if (p == NULL) if (p == nil)
FATAL("out of space for symbol table at %s", n); FATAL("out of space for symbol table at %s", n);
p->nval = tostring(n); p->nval = tostring(n);
p->sval = s ? tostring(s) : tostring(""); p->sval = s ? tostring(s) : tostring("");
@ -217,13 +215,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
p->tval = t; p->tval = t;
p->csub = CUNK; p->csub = CUNK;
p->ctype = OCELL; p->ctype = OCELL;
tp->nelem++; tp->nelemt++;
if (tp->nelem > FULLTAB * tp->size) if (tp->nelemt > FULLTAB * tp->size)
rehash(tp); rehash(tp);
h = hash(n, tp->size); h = hash(n, tp->size);
p->cnext = tp->tab[h]; p->cnext = tp->tab[h];
tp->tab[h] = p; tp->tab[h] = p;
dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) ); p, p->nval, p->sval, p->fval, p->tval) );
return(p); return(p);
} }
@ -244,7 +242,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */
nsz = GROWTAB * tp->size; nsz = GROWTAB * tp->size;
np = (Cell **) calloc(nsz, sizeof(Cell *)); np = (Cell **) calloc(nsz, sizeof(Cell *));
if (np == NULL) /* can't do it, but can keep running. */ if (np == nil) /* can't do it, but can keep running. */
return; /* someone else will run out later. */ return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) { for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp; cp = op) { for (cp = tp->tab[i]; cp; cp = op) {
@ -265,10 +263,10 @@ Cell *lookup(char *s, Array *tp) /* look for s in tp */
int h; int h;
h = hash(s, tp->size); h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; p = p->cnext) for (p = tp->tab[h]; p != nil; p = p->cnext)
if (strcmp(s, p->nval) == 0) if (strcmp(s, p->nval) == 0)
return(p); /* found it */ return(p); /* found it */
return(NULL); /* not found */ return(nil); /* not found */
} }
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
@ -282,7 +280,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
fldno = atoi(vp->nval); fldno = atoi(vp->nval);
if (fldno > *NF) if (fldno > *NF)
newfld(fldno); newfld(fldno);
dprintf( ("setting field %d to %g\n", fldno, f) ); dprint( ("setting field %d to %g\n", fldno, f) );
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
@ -291,7 +289,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
xfree(vp->sval); /* free any previous string */ xfree(vp->sval); /* free any previous string */
vp->tval &= ~STR; /* mark string invalid */ vp->tval &= ~STR; /* mark string invalid */
vp->tval |= NUM; /* mark number ok */ vp->tval |= NUM; /* mark number ok */
dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) ); dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
return vp->fval = f; return vp->fval = f;
} }
@ -310,7 +308,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
char *t; char *t;
int fldno; int fldno;
dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) ); dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
if ((vp->tval & (NUM | STR)) == 0) if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to"); funnyvar(vp, "assign to");
if (isfld(vp)) { if (isfld(vp)) {
@ -318,7 +316,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
fldno = atoi(vp->nval); fldno = atoi(vp->nval);
if (fldno > *NF) if (fldno > *NF)
newfld(fldno); newfld(fldno);
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) ); dprint( ("setting field %d to %s (%p)\n", fldno, s, s) );
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
@ -329,7 +327,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
if (freeable(vp)) if (freeable(vp))
xfree(vp->sval); xfree(vp->sval);
vp->tval &= ~DONTFREE; vp->tval &= ~DONTFREE;
dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) ); dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
return(vp->sval = t); return(vp->sval = t);
} }
@ -346,7 +344,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
if (is_number(vp->sval) && !(vp->tval&CON)) if (is_number(vp->sval) && !(vp->tval&CON))
vp->tval |= NUM; /* make NUM only sparingly */ vp->tval |= NUM; /* make NUM only sparingly */
} }
dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) ); dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
return(vp->fval); return(vp->fval);
} }
@ -365,14 +363,14 @@ char *getsval(Cell *vp) /* get string val of a Cell */
if (freeable(vp)) if (freeable(vp))
xfree(vp->sval); xfree(vp->sval);
if (modf(vp->fval, &dtemp) == 0) /* it's integral */ if (modf(vp->fval, &dtemp) == 0) /* it's integral */
sprintf(s, "%.30g", vp->fval); sprint(s, "%.30g", vp->fval);
else else
sprintf(s, *CONVFMT, vp->fval); sprint(s, *CONVFMT, vp->fval);
vp->sval = tostring(s); vp->sval = tostring(s);
vp->tval &= ~DONTFREE; vp->tval &= ~DONTFREE;
vp->tval |= STR; vp->tval |= STR;
} }
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) ); dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
return(vp->sval); return(vp->sval);
} }
@ -381,7 +379,7 @@ char *tostring(char *s) /* make a copy of string s */
char *p; char *p;
p = (char *) malloc(strlen(s)+1); p = (char *) malloc(strlen(s)+1);
if (p == NULL) if (p == nil)
FATAL("out of space in tostring on %s", s); FATAL("out of space in tostring on %s", s);
strcpy(p, s); strcpy(p, s);
return(p); return(p);
@ -393,7 +391,7 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
int c, n; int c, n;
char *buf, *bp; char *buf, *bp;
if ((buf = (char *) malloc(strlen(s)+3)) == NULL) if ((buf = (char *) malloc(strlen(s)+3)) == nil)
FATAL( "out of space in qstring(%s)", s); FATAL( "out of space in qstring(%s)", s);
for (bp = buf; (c = *s) != delim; s++) { for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n') if (c == '\n')
@ -429,6 +427,6 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
} }
} }
} }
*bp++ = 0; *bp = 0;
return buf; return buf;
} }

View file

@ -1,7 +1,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include "hash.h" #include "hash.h"
Hash hash; Hash hash;

View file

@ -1,7 +1,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include "hash.h" #include "hash.h"
enum enum

View file

@ -2,8 +2,8 @@
#include <libc.h> #include <libc.h>
#include <bin.h> #include <bin.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include "/sys/src/libregexp/regcomp.h" #include "regcomp.h"
#include "dfa.h" #include "dfa.h"
void rdump(Reprog*); void rdump(Reprog*);

View file

@ -1,7 +1,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include "/sys/src/libregexp/regcomp.h" #include "/sys/src/libregexp/regcomp.h"
#include "dfa.h" #include "dfa.h"

View file

@ -7,7 +7,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include <ctype.h> #include <ctype.h>
#include "dfa.h" #include "dfa.h"

View file

@ -4,7 +4,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include "regexp.h"
#include "/sys/src/libregexp/regcomp.h" #include "regcomp.h"
#define TRUE 1 #define TRUE 1
#define FALSE 0 #define FALSE 0

View file

@ -1,7 +1,7 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include <bio.h> #include <bio.h>
#include <regexp.h> #include "regexp.h"
#include "dfa.h" #include "dfa.h"
/*** /***

View file

@ -6,12 +6,12 @@ OFILES=\
regerror.$O\ regerror.$O\
regexec.$O\ regexec.$O\
regsub.$O\ regsub.$O\
regaux.$O\
rregexec.$O\ rregexec.$O\
rregsub.$O\ rregsub.$O\
regprint.$O\
HFILES=/sys/include/regexp.h\ HFILES=/sys/include/regexp.h\
regcomp.h\ regimpl.h\
UPDATE=\ UPDATE=\
mkfile\ mkfile\
@ -21,8 +21,8 @@ UPDATE=\
</sys/src/cmd/mksyslib </sys/src/cmd/mksyslib
test: test.$O $OFILES $O.regextest: tests/regextest.$O $LIB
$LD -o test $prereq $LD -o $target regextest.$O
test2: test2.$O $OFILES $O.sysregextest: tests/sysregextest.$O
$LD -o test2 $prereq $LD -o $target sysregextest.$O

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include <regexp.h>
void void
regerror(char *s) regerror(char *s)

View file

@ -1,232 +1,190 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include <regexp.h>
#include "regcomp.h" #include "regimpl.h"
typedef struct RethreadQ RethreadQ;
/* struct RethreadQ
* return 0 if no match
* >0 if a match
* <0 if we ran out of _relist space
*/
static int
regexec1(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{ {
int flag=0; Rethread *head;
Reinst *inst; Rethread **tail;
Relist *tlp; };
char *s;
int i, checkstart;
Rune r, *rp, *ep;
int n;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
char *p;
match = 0; int
checkstart = j->starttype; regexec(Reprog *prog, char *str, Resub *sem, int msize)
if(mp) {
for(i=0; i<ms; i++) { RethreadQ lists[2], *clist, *nlist, *tmp;
mp[i].sp = 0; Rethread *t, *nextthr, **availthr;
mp[i].ep = 0; Reinst *curinst;
Rune r;
char *sp, *ep, endc;
int i, match, first, gen, matchpri, pri;
if(msize > NSUBEXPM)
msize = NSUBEXPM;
if(prog->startinst->gen != 0) {
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
curinst->gen = 0;
}
clist = lists;
clist->head = nil;
clist->tail = &clist->head;
nlist = lists + 1;
nlist->head = nil;
nlist->tail = &nlist->head;
for(i = 0; i < prog->nthr; i++)
prog->thrpool[i] = prog->threads + i;
availthr = prog->thrpool + prog->nthr;
pri = matchpri = gen = match = 0;
sp = str;
ep = nil;
endc = '\0';
if(sem != nil && msize > 0) {
if(sem->sp != nil)
sp = sem->sp;
if(sem->ep != nil && *sem->ep != '\0') {
ep = sem->ep;
endc = *sem->ep;
*sem->ep = '\0';
} }
j->relist[0][0].inst = 0; }
j->relist[1][0].inst = 0; r = Runemax + 1;
for(; r != L'\0'; sp += i) {
/* Execute machine once for each character, including terminal NUL */ gen++;
s = j->starts; i = chartorune(&r, sp);
do{ first = 1;
/* fast check for first char */ t = clist->head;
if(checkstart) { if(t == nil)
switch(j->starttype) { goto Start;
case RUNE: curinst = t->pc;
p = utfrune(s, j->startchar); Again:
if(p == 0 || s == j->eol) if(curinst->gen == gen)
return match; goto Done;
s = p; curinst->gen = gen;
break; switch(curinst->op) {
case BOL: case ORUNE:
if(s == bol) if(r != curinst->r)
break; goto Done;
p = utfrune(s, '\n'); case OANY: /* fallthrough */
if(p == 0 || s == j->eol) Any:
return match; nextthr = t->next;
s = p+1; t->pc = curinst + 1;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break; break;
t = nextthr;
curinst = t->pc;
goto Again;
case OCLASS:
Class:
if(r < curinst->r)
goto Done;
if(r > curinst->r1) {
curinst++;
goto Class;
} }
} nextthr = t->next;
r = *(uchar*)s; t->pc = curinst->a;
if(r < Runeself) t->next = nil;
n = 1; *nlist->tail = t;
else nlist->tail = &t->next;
n = chartorune(&r, s); if(nextthr == nil)
break;
/* switch run lists */ t = nextthr;
tl = j->relist[flag]; curinst = t->pc;
tle = j->reliste[flag]; goto Again;
nl = j->relist[flag^=1]; case ONOTNL:
nle = j->reliste[flag]; if(r != L'\n') {
nl->inst = 0; curinst++;
goto Again;
/* Add first instruction to current list */ }
if(match == 0) goto Done;
_renewemptythread(tl, progp->startinst, ms, s); case OBOL:
if(sp == str || sp[-1] == '\n') {
/* Execute machine until current list is empty */ curinst++;
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ goto Again;
for(inst = tlp->inst; ; inst = inst->next){ }
switch(inst->type){ goto Done;
case RUNE: /* regular character */ case OEOL:
if(inst->r == r){ if(r == L'\0' && ep == nil) {
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) curinst++;
return -1; goto Again;
} }
break; if(r == L'\n')
case LBRA: goto Any;
tlp->se.m[inst->subid].sp = s; goto Done;
continue; case OJMP:
case RBRA: curinst = curinst->a;
tlp->se.m[inst->subid].ep = s; goto Again;
continue; case OSPLIT:
case ANY: nextthr = *--availthr;
if(r != '\n') nextthr->pc = curinst->b;
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) if(msize > 0)
return -1; memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
break; nextthr->pri = t->pri;
case ANYNL: nextthr->next = t->next;
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) t->next = nextthr;
return -1; curinst = curinst->a;
break; goto Again;
case BOL: case OSAVE:
if(s == bol || *(s-1) == '\n') if(curinst->sub < msize)
continue; t->sem[curinst->sub].sp = sp;
break; curinst++;
case EOL: goto Again;
if(s == j->eol || r == 0 || r == '\n') case OUNSAVE:
continue; if(curinst->sub == 0) {
break; /* "Highest" priority is the left-most longest. */
case CCLASS: if (t->pri > matchpri)
ep = inst->cp->end; goto Done;
for(rp = inst->cp->spans; rp < ep; rp += 2) match = 1;
if(r >= rp[0] && r <= rp[1]){ matchpri = t->pri;
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) if(sem != nil && msize > 0) {
return -1; memcpy(sem, t->sem, sizeof(Resub)*msize);
break; sem->ep = sp;
}
break;
case NCCLASS:
ep = inst->cp->end;
for(rp = inst->cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].ep = s;
if(mp != 0)
_renewmatch(mp, ms, &tlp->se);
break;
} }
break; goto Done;
} }
if(curinst->sub < msize)
t->sem[curinst->sub].ep = sp;
curinst++;
goto Again;
Done:
*availthr++ = t;
t = t->next;
if(t == nil)
break;
curinst = t->pc;
goto Again;
} }
if(s == j->eol) Start:
/* Start again once if we haven't found anything. */
if(first == 1 && match == 0) {
first = 0;
t = *--availthr;
if(msize > 0)
memset(t->sem, 0, sizeof(Resub)*msize);
/* "Lower" priority thread */
t->pri = matchpri = pri++;
t->next = nil;
curinst = prog->startinst;
goto Again;
}
/* If we have a match and no extant threads, we are done. */
if(match == 1 && nlist->head == nil)
break; break;
checkstart = j->starttype && nl->inst==0; tmp = clist;
s += n; clist = nlist;
}while(r); nlist = tmp;
nlist->head = nil;
nlist->tail = &nlist->head;
}
if(ep != nil)
*ep = endc;
return match; return match;
} }
static int
regexec2(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{
int rv;
Relist *relist0, *relist1;
/* mark space */
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist0 == nil)
return -1;
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0;
j->relist[1] = relist1;
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
rv = regexec1(progp, bol, mp, ms, j);
free(relist0);
free(relist1);
return rv;
}
extern int
regexec(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms) /* number of elements at mp */
{
Reljunk j;
Relist relist0[LISTSIZE], relist1[LISTSIZE];
int rv;
/*
* use user-specified starting/ending location if specified
*/
j.starts = bol;
j.eol = 0;
if(mp && ms>0){
if(mp->sp)
j.starts = mp->sp;
if(mp->ep)
j.eol = mp->ep;
}
j.starttype = 0;
j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->r;
}
if(progp->startinst->type == BOL)
j.starttype = BOL;
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = regexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
rv = regexec2(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
return -1;
}

104
sys/src/libregexp/regimpl.h Normal file
View file

@ -0,0 +1,104 @@
enum
{
LANY = 0,
LBOL,
LCLASS,
LEND,
LEOL,
LLPAR,
LOR,
LREP,
LRPAR,
LRUNE,
TANY = 0,
TBOL,
TCAT,
TCLASS,
TEOL,
TNOTNL,
TOR,
TPLUS,
TQUES,
TRUNE,
TSTAR,
TSUB,
NSUBEXPM = 32
};
typedef struct Parselex Parselex;
typedef struct Renode Renode;
struct Parselex
{
/* Parse */
Renode *next;
Renode *nodes;
int sub;
int instrs;
jmp_buf exitenv;
/* Lex */
void (*getnextr)(Parselex*);
char *rawexp;
char *orig;
Rune rune;
Rune peek;
int peeklex;
int done;
int literal;
Rune cpairs[400+2];
int nc;
};
struct Renode
{
int op;
Renode *left;
Rune r;
union
{
Rune r1;
int sub;
Renode *right;
};
int nclass;
};
struct Rethread
{
Reinst *pc;
Resub sem[NSUBEXPM];
int pri;
Rethread *next;
};
struct Reinst
{
char op;
int gen;
Reinst *a;
union
{
Rune r;
int sub;
};
union
{
Rune r1;
Reinst *b;
};
};
static int lex(Parselex*);
static void getnextr(Parselex*);
static void getnextrlit(Parselex*);
static void getclass(Parselex*);
static Renode *e0(Parselex*);
static Renode *e1(Parselex*);
static Renode *e2(Parselex*);
static Renode *e3(Parselex*);
static Renode *buildclass(Parselex*);
static Renode *buildclassn(Parselex*);
static int pcmp(void*, void*);
static Reprog *regcomp1(char*, int, int);
static Reinst *compile(Renode*, Reprog*, int);
static Reinst *compile1(Renode*, Reinst*, int*, int);
static void prtree(Renode*, int, int);

View file

@ -0,0 +1,66 @@
#include <u.h>
#include <libc.h>
#include <regexp.h>
#include <regimpl.h>
static int
fmtprinst(Fmt *f, Reinst *inst)
{
int r;
r = fmtprint(f, "%p ", inst);
switch(inst->op) {
case ORUNE:
r += fmtprint(f, "ORUNE\t%C\n", inst->r);
break;
case ONOTNL:
r += fmtprint(f, "ONOTNL\n");
break;
case OCLASS:
r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a);
break;
case OSPLIT:
r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b);
break;
case OJMP:
r += fmtprint(f, "OJMP \t%p\n", inst->a);
break;
case OSAVE:
r += fmtprint(f, "OSAVE\t%d\n", inst->sub);
break;
case OUNSAVE:
r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub);
break;
case OANY:
r += fmtprint(f, "OANY \t.\n");
break;
case OEOL:
r += fmtprint(f, "OEOL \t$\n");
break;
case OBOL:
r += fmtprint(f, "OBOL \t^\n");
break;
}
return r;
}
static int
fmtprprog(Fmt *f, Reprog *reprog)
{
Reinst *inst;
int r;
r = 0;
for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++)
r += fmtprinst(f, inst);
return r;
}
int
reprogfmt(Fmt *f)
{
Reprog *r;
r = va_arg(f->args, Reprog*);
return fmtprprog(f, r);
}

View file

@ -1,63 +1,66 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include <regexp.h>
/* substitute into one string using the matches from the last regexec() */ void
extern void regsub(char *src, char *dst, int dlen, Resub *match, int msize)
regsub(char *sp, /* source string */
char *dp, /* destination string */
int dlen,
Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */
{ {
char *ssp, *ep;
int i; int i;
char *ep, c;
ep = dp+dlen-1; ep = dst + dlen-1;
while(*sp != '\0'){ for(;*src != '\0'; src++) switch(*src) {
if(*sp == '\\'){ case '\\':
switch(*++sp){ switch(*++src) {
case '0': case '0':
case '1': case '1':
case '2': case '2':
case '3': case '3':
case '4': case '4':
case '5': case '5':
case '6': case '6':
case '7': case '7':
case '8': case '8':
case '9': case '9':
i = *sp-'0'; i = *src - '0';
if(mp!=0 && mp[i].sp != 0 && ms>i) if(match != nil && i < msize && match[i].ep != nil) {
for(ssp = mp[i].sp; c = *match[i].ep;
ssp < mp[i].ep; *match[i].ep = '\0';
ssp++) dst = strecpy(dst, ep+1, match[i].sp);
if(dp < ep) *match[i].ep = c;
*dp++ = *ssp;
break;
case '\\':
if(dp < ep)
*dp++ = '\\';
break;
case '\0':
sp--;
break;
default:
if(dp < ep)
*dp++ = *sp;
break;
} }
}else if(*sp == '&'){ break;
if(mp!=0 && mp[0].sp != 0 && ms>0) case '\\':
for(ssp = mp[0].sp; if(dst < ep)
ssp < mp[0].ep; ssp++) *dst++ = '\\';
if(dp < ep) else
*dp++ = *ssp; goto End;
}else{ break;
if(dp < ep) case '\0':
*dp++ = *sp; goto End;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
} }
sp++; break;
case '&':
if(match != nil && msize > 0 && match[0].sp != nil) {
c = *match[0].ep;
*match[0].ep = '\0';
dst = strecpy(dst, ep+1, match[0].sp);
*match[0].ep = c;
}
break;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
} }
*dp = '\0'; End:
*dst = '\0';
} }

View file

@ -1,212 +1,189 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include <regexp.h>
#include "regcomp.h" #include "regimpl.h"
/* typedef struct RethreadQ RethreadQ;
* return 0 if no match struct RethreadQ
* >0 if a match
* <0 if we ran out of _relist space
*/
static int
rregexec1(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j)
{ {
int flag=0; Rethread *head;
Reinst *inst; Rethread **tail;
Relist *tlp; };
Rune *s;
int i, checkstart;
Rune r, *rp, *ep;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
Rune *p;
match = 0; int
checkstart = j->startchar; rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
if(mp) {
for(i=0; i<ms; i++) { RethreadQ lists[2], *clist, *nlist, *tmp;
mp[i].rsp = 0; Rethread *t, *nextthr, **availthr;
mp[i].rep = 0; Reinst *curinst;
Rune *rsp, *rep, endr, last;
int i, match, first, gen, pri, matchpri;
if(msize > NSUBEXPM)
msize = NSUBEXPM;
if(prog->startinst->gen != 0) {
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
curinst->gen = 0;
}
clist = lists;
clist->head = nil;
clist->tail = &clist->head;
nlist = lists + 1;
nlist->head = nil;
nlist->tail = &nlist->head;
for(i = 0; i < prog->nthr; i++)
prog->thrpool[i] = prog->threads + i;
availthr = prog->thrpool + prog->nthr;
pri = matchpri = gen = match = 0;
rsp = str;
rep = nil;
endr = L'\0';
if(sem != nil && msize > 0) {
if(sem->rsp != nil)
rsp = sem->rsp;
if(sem->rep != nil && *sem->rep != L'\0') {
rep = sem->rep;
endr = *sem->rep;
*sem->rep = '\0';
} }
j->relist[0][0].inst = 0; }
j->relist[1][0].inst = 0; last = 1;
for(; last != L'\0'; rsp++) {
/* Execute machine once for each character, including terminal NUL */ gen++;
s = j->rstarts; last = *rsp;
do{ first = 1;
/* fast check for first char */ t = clist->head;
if(checkstart) { if(t == nil)
switch(j->starttype) { goto Start;
case RUNE: curinst = t->pc;
p = runestrchr(s, j->startchar); Again:
if(p == 0 || s == j->reol) if(curinst->gen == gen)
return match; goto Done;
s = p; curinst->gen = gen;
break; switch(curinst->op) {
case BOL: case ORUNE:
if(s == bol) if(*rsp != curinst->r)
break; goto Done;
p = runestrchr(s, '\n'); case OANY: /* fallthrough */
if(p == 0 || s == j->reol) Any:
return match; nextthr = t->next;
s = p+1; t->pc = curinst + 1;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break; break;
t = nextthr;
curinst = t->pc;
goto Again;
case OCLASS:
Class:
if(*rsp < curinst->r)
goto Done;
if(*rsp > curinst->r1) {
curinst++;
goto Class;
} }
} nextthr = t->next;
t->pc = curinst->a;
r = *s; t->next = nil;
*nlist->tail = t;
/* switch run lists */ nlist->tail = &t->next;
tl = j->relist[flag]; if(nextthr == nil)
tle = j->reliste[flag]; break;
nl = j->relist[flag^=1]; t = nextthr;
nle = j->reliste[flag]; curinst = t->pc;
nl->inst = 0; goto Again;
case ONOTNL:
/* Add first instruction to current list */ if(*rsp != L'\n') {
_rrenewemptythread(tl, progp->startinst, ms, s); curinst++;
goto Again;
/* Execute machine until current list is empty */ }
for(tlp=tl; tlp->inst; tlp++){ goto Done;
for(inst=tlp->inst; ; inst = inst->next){ case OBOL:
switch(inst->type){ if(rsp == str || rsp[-1] == L'\n') {
case RUNE: /* regular character */ curinst++;
if(inst->r == r) goto Again;
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) }
return -1; goto Done;
break; case OEOL:
case LBRA: if(*rsp == L'\0' && rep == nil) {
tlp->se.m[inst->subid].rsp = s; curinst++;
continue; goto Again;
case RBRA: }
tlp->se.m[inst->subid].rep = s; if(*rsp == '\n')
continue; goto Any;
case ANY: goto Done;
if(r != '\n') case OJMP:
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) curinst = curinst->a;
return -1; goto Again;
break; case OSPLIT:
case ANYNL: nextthr = *--availthr;
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) nextthr->pc = curinst->b;
return -1; if(msize > 0)
break; memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
case BOL: nextthr->pri = t->pri;
if(s == bol || *(s-1) == '\n') nextthr->next = t->next;
continue; t->next = nextthr;
break; curinst = curinst->a;
case EOL: goto Again;
if(s == j->reol || r == 0 || r == '\n') case OSAVE:
continue; if(curinst->sub < msize)
break; t->sem[curinst->sub].rsp = rsp;
case CCLASS: curinst++;
ep = inst->cp->end; goto Again;
for(rp = inst->cp->spans; rp < ep; rp += 2) case OUNSAVE:
if(r >= rp[0] && r <= rp[1]){ if(curinst->sub == 0) {
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) /* "Highest" priority is the left-most longest. */
return -1; if (t->pri > matchpri)
break; goto Done;
} match = 1;
break; matchpri = t->pri;
case NCCLASS: if(sem != nil && msize > 0) {
ep = inst->cp->end; memcpy(sem, t->sem, sizeof(Resub)*msize);
for(rp = inst->cp->spans; rp < ep; rp += 2) sem->rep = rsp;
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].rep = s;
if(mp != 0)
_renewmatch(mp, ms, &tlp->se);
break;
} }
break; goto Done;
} }
if(curinst->sub < msize)
t->sem[curinst->sub].rep = rsp;
curinst++;
goto Again;
Done:
*availthr++ = t;
t = t->next;
if(t == nil)
break;
curinst = t->pc;
goto Again;
} }
if(s == j->reol) Start:
/* Start again once if we haven't found anything. */
if(first == 1 && match == 0) {
first = 0;
t = *--availthr;
if(msize > 0)
memset(t->sem, 0, sizeof(Resub)*msize);
/* "Lower" priority thread */
t->pri = matchpri = pri++;
t->next = nil;
curinst = prog->startinst;
goto Again;
}
/* If we have a match and no extant threads, we are done. */
if(match == 1 && nlist->head == nil)
break; break;
checkstart = j->startchar && nl->inst==0; tmp = clist;
s++; clist = nlist;
}while(r); nlist = tmp;
nlist->head = nil;
nlist->tail = &nlist->head;
}
if(rep != nil)
*rep = endr;
return match; return match;
} }
static int
rregexec2(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{
Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
/* mark space */
j->relist[0] = relist0;
j->relist[1] = relist1;
j->reliste[0] = relist0 + nelem(relist0) - 2;
j->reliste[1] = relist1 + nelem(relist1) - 2;
return rregexec1(progp, bol, mp, ms, j);
}
extern int
rregexec(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms) /* number of elements at mp */
{
Reljunk j;
Relist relist0[LISTSIZE], relist1[LISTSIZE];
int rv;
/*
* use user-specified starting/ending location if specified
*/
j.rstarts = bol;
j.reol = 0;
if(mp && ms>0){
if(mp->sp)
j.rstarts = mp->rsp;
if(mp->ep)
j.reol = mp->rep;
}
j.starttype = 0;
j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->r;
}
if(progp->startinst->type == BOL)
j.starttype = BOL;
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = rregexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
rv = rregexec2(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
return -1;
}

View file

@ -1,64 +1,66 @@
#include <u.h> #include <u.h>
#include <libc.h> #include <libc.h>
#include "regexp.h" #include <regexp.h>
/* substitute into one string using the matches from the last regexec() */ void
extern void rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize)
rregsub(Rune *sp, /* source string */
Rune *dp, /* destination string */
int dlen,
Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */
{ {
Rune *ssp, *ep;
int i; int i;
Rune *ep, r;
ep = dp+(dlen/sizeof(Rune))-1; ep = dst + dlen-1;
while(*sp != '\0'){ for(;*src != L'\0'; src++) switch(*src) {
if(*sp == '\\'){ case L'\\':
switch(*++sp){ switch(*++src) {
case '0': case L'0':
case '1': case L'1':
case '2': case L'2':
case '3': case L'3':
case '4': case L'4':
case '5': case L'5':
case '6': case L'6':
case '7': case L'7':
case '8': case L'8':
case '9': case L'9':
i = *sp-'0'; i = *src - L'0';
if(mp[i].rsp != 0 && mp!=0 && ms>i) if(match != nil && i < msize && match[i].rsp != nil) {
for(ssp = mp[i].rsp; r = *match[i].rep;
ssp < mp[i].rep; *match[i].rep = L'\0';
ssp++) dst = runestrecpy(dst, ep+1, match[i].rsp);
if(dp < ep) *match[i].rep = r;
*dp++ = *ssp;
break;
case '\\':
if(dp < ep)
*dp++ = '\\';
break;
case '\0':
sp--;
break;
default:
if(dp < ep)
*dp++ = *sp;
break;
} }
}else if(*sp == '&'){ break;
if(mp[0].rsp != 0 && mp!=0 && ms>0) case L'\\':
if(mp[0].rsp != 0) if(dst < ep)
for(ssp = mp[0].rsp; *dst++ = L'\\';
ssp < mp[0].rep; ssp++) else
if(dp < ep) goto End;
*dp++ = *ssp; break;
}else{ case L'\0':
if(dp < ep) goto End;
*dp++ = *sp; default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
} }
sp++; break;
case L'&':
if(match != nil && msize > 0 && match[0].rsp != nil) {
r = *match[0].rep;
*match[0].rep = L'\0';
dst = runestrecpy(dst, ep+1, match[0].rsp);
*match[0].rep = r;
}
break;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
} }
*dp = '\0'; End:
*dst = L'\0';
} }