plan9fox/sys/src/cmd/dict/dict.c
Ori Bernstein 5bc9b0c3ca improve usage messages (thanks henesy)
Fix inconsistencies between programs and their usage
messages,  correct instances where information seems
to be missing or lost. This  includes missing arguments,
making usage consistent with manuals, and so on.
2020-03-10 10:09:34 -07:00

668 lines
12 KiB
C

#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include <ctype.h>
#include "dict.h"
/*
* Assumed index file structure: lines of form
* [^\t]+\t[0-9]+
* First field is key, second is byte offset into dictionary.
* Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
*/
typedef struct Addr Addr;
struct Addr {
int n; /* number of offsets */
int cur; /* current position within doff array */
int maxn; /* actual current size of doff array */
ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
};
Biobuf binbuf;
Biobuf boutbuf;
Biobuf *bin = &binbuf; /* user cmd input */
Biobuf *bout = &boutbuf; /* output */
Biobuf *bdict; /* dictionary */
Biobuf *bindex; /* index file */
long indextop; /* index offset at end of file */
int lastcmd; /* last executed command */
Addr *dot; /* "current" address */
Dict *dict; /* current dictionary */
int linelen;
int breaklen = 60;
int outinhibit;
int debug;
void execcmd(int);
int getpref(char*, Rune*);
Entry getentry(int);
int getfield(Rune*);
long locate(Rune*);
int parseaddr(char*, char**);
int parsecmd(char*);
int search(char*, int);
long seeknextline(Biobuf*, long);
void setdotnext(void);
void setdotprev(void);
void sortaddr(Addr*);
void usage(void);
enum {
Plen=300, /* max length of a search pattern */
Fieldlen=200, /* max length of an index field */
Aslots=10, /* initial number of slots in an address */
};
void
main(int argc, char **argv)
{
int i, cmd, kflag;
char *line, *p;
Binit(&binbuf, 0, OREAD);
Binit(&boutbuf, 1, OWRITE);
kflag = 0;
line = 0;
dict = 0;
for(i=0; dicts[i].name; i++){
if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
dict = &dicts[i];
break;
}
}
ARGBEGIN {
case 'd':
p = EARGF(usage());
dict = 0;
for(i=0; dicts[i].name; i++)
if(strcmp(p, dicts[i].name)==0) {
dict = &dicts[i];
break;
}
if(!dict)
usage();
break;
case 'c':
line = EARGF(usage());
break;
case 'k':
kflag++;
break;
case 'D':
debug++;
break;
default:
usage();
}ARGEND
if(dict == 0){
err("no dictionaries present on this system");
exits("nodict");
}
if(kflag) {
(*dict->printkey)();
exits(0);
}
if(argc > 1)
usage();
else if(argc == 1) {
if(line)
usage();
p = argv[0];
line = malloc(strlen(p)+5);
sprint(line, "/%s/P\n", p);
}
bdict = Bopen(dict->path, OREAD);
if(!bdict) {
err("can't open dictionary %s", dict->path);
exits("nodict");
}
bindex = Bopen(dict->indexpath, OREAD);
if(!bindex) {
err("can't open index %s", dict->indexpath);
exits("noindex");
}
indextop = Bseek(bindex, 0L, 2);
dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
dot->n = 0;
dot->cur = 0;
dot->maxn = Aslots;
lastcmd = 0;
if(line) {
cmd = parsecmd(line);
if(cmd)
execcmd(cmd);
} else {
for(;;) {
Bprint(bout, "*");
Bflush(bout);
line = Brdline(bin, '\n');
linelen = 0;
if(!line)
break;
cmd = parsecmd(line);
if(cmd) {
execcmd(cmd);
lastcmd = cmd;
}
}
}
exits(0);
}
void
usage(void)
{
int i;
char *a, *b;
Bprint(bout, "usage: %s [-k] [-d dict] [-c cmd] [pattern]\n", argv0);
Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
for(i = 0; dicts[i].name; i++){
a = b = "";
if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
a = "[";
b = "]";
}
Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
}
exits("usage");
}
int
parsecmd(char *line)
{
char *e;
int cmd, ans;
if(parseaddr(line, &e) >= 0)
line = e;
else
return 0;
cmd = *line;
ans = cmd;
if(isupper(cmd))
cmd = tolower(cmd);
if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
cmd == '\n')) {
err("unknown command %c", cmd);
return 0;
}
if(cmd == '\n')
switch(lastcmd) {
case 0: ans = 'H'; break;
case 'H': ans = 'p'; break;
default : ans = lastcmd; break;
}
else if(line[1] != '\n' && line[1] != 0)
err("extra stuff after command %c ignored", cmd);
return ans;
}
void
execcmd(int cmd)
{
Entry e;
int cur, doall;
if(isupper(cmd)) {
doall = 1;
cmd = tolower(cmd);
cur = 0;
} else {
doall = 0;
cur = dot->cur;
}
if(debug && doall && cmd == 'a')
Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
for(;;){
if(cur >= dot->n)
break;
if(doall) {
Bprint(bout, "%d\t", cur+1);
linelen += 4 + (cur >= 10);
}
switch(cmd) {
case 'a':
Bprint(bout, "#%lud\n", dot->doff[cur]);
break;
case 'h':
case 'p':
case 'r':
e = getentry(cur);
(*dict->printentry)(e, cmd);
break;
}
cur++;
if(doall) {
if(cmd == 'p' || cmd == 'r') {
Bputc(bout, '\n');
linelen = 0;
}
} else
break;
}
if(cur >= dot->n)
cur = 0;
dot->cur = cur;
}
/*
* Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
* Answer goes in dot.
* Return -1 if address starts, but get error.
* Return 0 if no address.
*/
int
parseaddr(char *line, char **eptr)
{
int delim, plen;
ulong v;
char *e;
char pat[Plen];
if(*line == '/' || *line == '!') {
/* anchored regular expression match; '!' means no folding */
if(*line == '/') {
delim = '/';
e = strpbrk(line+1, "/\n");
} else {
delim = '!';
e = strpbrk(line+1, "!\n");
}
plen = e-line-1;
if(plen >= Plen-3) {
err("pattern too big");
return -1;
}
pat[0] = '^';
memcpy(pat+1, line+1, plen);
pat[plen+1] = '$';
pat[plen+2] = 0;
if(*e == '\n')
line = e;
else
line = e+1;
if(!search(pat, delim == '/')) {
err("pattern not found");
return -1;
}
} else if(*line == '#') {
/* absolute byte offset into dictionary */
line++;
if(!isdigit(*line))
return -1;
v = strtoul(line, &e, 10);
line = e;
dot->doff[0] = v;
dot->n = 1;
dot->cur = 0;
} else if(isdigit(*line)) {
v = strtoul(line, &e, 10);
line = e;
if(v < 1 || v > dot->n)
err(".%d not in range [1,%d], ignored",
v, dot->n);
else
dot->cur = v-1;
} else if(*line == '.') {
line++;
} else {
*eptr = line;
return 0;
}
while(*line == '+' || *line == '-') {
if(*line == '+')
setdotnext();
else
setdotprev();
line++;
}
*eptr = line;
return 1;
}
/*
* Index file is sorted by folded field1.
* Method: find pre, a folded prefix of r.e. pat,
* and then low = offset to beginning of
* line in index file where first match of prefix occurs.
* Then go through index until prefix no longer matches,
* adding each line that matches real pattern to dot.
* Finally, sort dot offsets (uniquing).
* We know pat len < Plen, and that it is surrounded by ^..$
*/
int
search(char *pat, int dofold)
{
int needre, prelen, match, n;
Reprog *re;
long ioff, v;
Rune pre[Plen];
Rune lit[Plen];
Rune entry[Fieldlen];
char fpat[Plen];
prelen = getpref(pat+1, pre);
if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
runestrcpy(lit, pre);
if(dofold)
fold(lit);
needre = 0;
SET(re);
} else {
needre = 1;
if(dofold) {
foldre(fpat, pat);
re = regcomp(fpat);
} else
re = regcomp(pat);
}
fold(pre);
ioff = locate(pre);
if(ioff < 0)
return 0;
dot->n = 0;
Bseek(bindex, ioff, 0);
for(;;) {
if(!getfield(entry))
break;
if(dofold)
fold(entry);
if(needre)
match = rregexec(re, entry, 0, 0);
else
match = (acomp(lit, entry) == 0);
if(match) {
if(!getfield(entry))
break;
v = runetol(entry);
if(dot->n >= dot->maxn) {
n = 2*dot->maxn;
dot = realloc(dot,
sizeof(Addr)+(n-1)*sizeof(long));
if(dot == nil)
sysfatal("realloc: %r");
dot->maxn = n;
}
dot->doff[dot->n++] = v;
} else {
if(!dofold)
fold(entry);
if(*pre) {
n = acomp(pre, entry);
if(n < -1 || (!needre && n < 0))
break;
}
/* get to next index entry */
if(!getfield(entry))
break;
}
}
sortaddr(dot);
dot->cur = 0;
return dot->n;
}
/*
* Return offset in index file of first line whose folded
* first field has pre as a prefix. -1 if none found.
*/
long
locate(Rune *pre)
{
long top, bot, mid;
Rune entry[Fieldlen];
if(*pre == 0)
return 0;
bot = 0;
top = indextop;
if(debug>1)
fprint(2, "locate looking for prefix %S\n", pre);
for(;;) {
/*
* Loop invariant: foldkey(bot) < pre <= foldkey(top)
* and bot < top, and bot,top point at beginning of lines
*/
mid = (top+bot) / 2;
mid = seeknextline(bindex, mid);
if(debug > 1)
fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
bot, (top+bot) / 2, mid, top);
if(mid == top || !getfield(entry))
break;
if(debug > 1)
fprint(2, "key=%S\n", entry);
/*
* here mid is strictly between bot and top
*/
fold(entry);
if(acomp(pre, entry) <= 0)
top = mid;
else
bot = mid;
}
/*
* bot < top, but they don't necessarily point at successive lines
* Use linear search from bot to find first line that pre is a
* prefix of
*/
while((bot = seeknextline(bindex, bot)) <= top) {
if(!getfield(entry))
return -1;
if(debug > 1)
fprint(2, "key=%S\n", entry);
fold(entry);
switch(acomp(pre, entry)) {
case -2:
return -1;
case -1:
case 0:
return bot;
case 1:
case 2:
continue;
}
}
return -1;
}
/*
* Get prefix of non re-metacharacters, runified, into pre,
* and return length
*/
int
getpref(char *pat, Rune *pre)
{
int n, r;
char *p;
p = pat;
while(*p) {
n = chartorune(pre, p);
r = *pre;
switch(r) {
case L'.': case L'*': case L'+': case L'?':
case L'[': case L']': case L'(': case ')':
case L'|': case L'^': case L'$':
*pre = 0;
return p-pat;
case L'\\':
p += n;
p += chartorune(++pre, p);
pre++;
break;
default:
p += n;
pre++;
}
}
return p-pat;
}
long
seeknextline(Biobuf *b, long off)
{
long c;
Bseek(b, off, 0);
do {
c = Bgetrune(b);
} while(c>=0 && c!='\n');
return Boffset(b);
}
/*
* Get next field out of index file (either tab- or nl- terminated)
* Answer in *rp, assumed to be Fieldlen long.
* Return 0 if read error first.
*/
int
getfield(Rune *rp)
{
long c;
int n;
for(n=Fieldlen; n-- > 0; ) {
if ((c = Bgetrune(bindex)) < 0)
return 0;
if(c == '\t' || c == '\n') {
*rp = L'\0';
return 1;
}
*rp++ = c;
}
err("word too long");
return 0;
}
/*
* A compare longs function suitable for qsort
*/
static int
longcmp(void *av, void *bv)
{
long v;
long *a, *b;
a = av;
b = bv;
v = *a - *b;
if(v < 0)
return -1;
else if(v == 0)
return 0;
else
return 1;
}
void
sortaddr(Addr *a)
{
int i, j;
long v;
if(a->n <= 1)
return;
qsort(a->doff, a->n, sizeof(long), longcmp);
/* remove duplicates */
for(i=0, j=0; j < a->n; j++) {
v = a->doff[j];
if(i > 0 && v == a->doff[i-1])
continue;
a->doff[i++] = v;
}
a->n = i;
}
Entry
getentry(int i)
{
long b, e, n;
static Entry ans;
static int anslen = 0;
b = dot->doff[i];
e = (*dict->nextoff)(b+1);
ans.doff = b;
if(e < 0) {
err("couldn't seek to entry");
ans.start = 0;
ans.end = 0;
} else {
n = e-b;
if(n+1 > anslen) {
ans.start = realloc(ans.start, n+1);
if(ans.start == nil)
sysfatal("realloc: %r");
anslen = n+1;
}
Bseek(bdict, b, 0);
n = Bread(bdict, ans.start, n);
ans.end = ans.start + n;
*ans.end = 0;
}
return ans;
}
void
setdotnext(void)
{
long b;
b = (*dict->nextoff)(dot->doff[dot->cur]+1);
if(b < 0) {
err("couldn't find a next entry");
return;
}
dot->doff[0] = b;
dot->n = 1;
dot->cur = 0;
}
void
setdotprev(void)
{
int tryback;
long here, last, p;
if(dot->cur < 0 || dot->cur >= dot->n)
return;
tryback = 2000;
here = dot->doff[dot->cur];
last = 0;
while(last == 0) {
p = here - tryback;
if(p < 0)
p = 0;
for(;;) {
p = (*dict->nextoff)(p+1);
if(p < 0)
return; /* shouldn't happen */
if(p >= here)
break;
last = p;
}
if(!last) {
if(here - tryback < 0) {
err("can't find a previous entry");
return;
}
tryback = 2*tryback;
}
}
dot->doff[0] = last;
dot->n = 1;
dot->cur = 0;
}