mothra: ignore http content-type and encoding and just do content sniffing
This commit is contained in:
parent
001ce57253
commit
85d6170c35
8 changed files with 90 additions and 185 deletions
|
@ -1,106 +0,0 @@
|
||||||
#include <u.h>
|
|
||||||
#include <libc.h>
|
|
||||||
#include <draw.h>
|
|
||||||
#include <event.h>
|
|
||||||
#include <panel.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include "mothra.h"
|
|
||||||
typedef struct Kind Kind;
|
|
||||||
struct Kind{
|
|
||||||
char *name;
|
|
||||||
int kind;
|
|
||||||
};
|
|
||||||
int klook(char *s, Kind *k){
|
|
||||||
while(k->name && cistrcmp(k->name, s)!=0)
|
|
||||||
k++;
|
|
||||||
return k->kind;
|
|
||||||
}
|
|
||||||
Kind suffix[]={
|
|
||||||
".html", HTML,
|
|
||||||
".htm", HTML,
|
|
||||||
"/", HTML,
|
|
||||||
".gif", GIF,
|
|
||||||
".jpe", JPEG,
|
|
||||||
".jpg", JPEG,
|
|
||||||
".jpeg", JPEG,
|
|
||||||
".png", PNG,
|
|
||||||
".pic", PIC,
|
|
||||||
".au", AUDIO,
|
|
||||||
".tif", TIFF,
|
|
||||||
".tiff", TIFF,
|
|
||||||
".xbm", XBM,
|
|
||||||
".txt", PLAIN,
|
|
||||||
".text", PLAIN,
|
|
||||||
".ai", POSTSCRIPT,
|
|
||||||
".eps", POSTSCRIPT,
|
|
||||||
".ps", POSTSCRIPT,
|
|
||||||
".pdf", PDF,
|
|
||||||
".zip", ZIP,
|
|
||||||
0, HTML
|
|
||||||
};
|
|
||||||
int suflook(char *s, int len, Kind *k){
|
|
||||||
int l;
|
|
||||||
while(k->name){
|
|
||||||
l=strlen(k->name);
|
|
||||||
if(l<=len && cistrcmp(k->name, s+len-l)==0) return k->kind;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
return k->kind;
|
|
||||||
}
|
|
||||||
int suffix2type(char *name){
|
|
||||||
int len, kind, restore;
|
|
||||||
char *s;
|
|
||||||
len=strlen(name);
|
|
||||||
if(len>=2 && cistrcmp(name+len-2, ".Z")==0){
|
|
||||||
kind=COMPRESS;
|
|
||||||
len-=2;
|
|
||||||
}
|
|
||||||
else if(len>=3 && cistrcmp(name+len-3, ".gz")==0){
|
|
||||||
kind=GUNZIP;
|
|
||||||
len-=3;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
kind=0;
|
|
||||||
restore=name[len];
|
|
||||||
name[len]='\0';
|
|
||||||
for(s=name+len;s!=name && *s!='.';--s);
|
|
||||||
kind|=suflook(name, len, suffix);
|
|
||||||
name[len]=restore;
|
|
||||||
return kind;
|
|
||||||
}
|
|
||||||
Kind content[]={
|
|
||||||
"text/html", HTML,
|
|
||||||
"text/x-html", HTML,
|
|
||||||
"application/html", HTML,
|
|
||||||
"application/x-html", HTML,
|
|
||||||
"text/plain", PLAIN,
|
|
||||||
"image/gif", GIF,
|
|
||||||
"image/jpeg", JPEG,
|
|
||||||
"image/pjpeg", JPEG,
|
|
||||||
"image/png", PNG,
|
|
||||||
"image/tiff", TIFF,
|
|
||||||
"image/x-xbitmap", XBM,
|
|
||||||
"image/x-bitmap", XBM,
|
|
||||||
"image/xbitmap", XBM,
|
|
||||||
"application/postscript", POSTSCRIPT,
|
|
||||||
"application/pdf", PDF,
|
|
||||||
"application/octet-stream", SUFFIX,
|
|
||||||
"application/zip", ZIP,
|
|
||||||
0, SUFFIX
|
|
||||||
};
|
|
||||||
int content2type(char *s, char *name){
|
|
||||||
int type;
|
|
||||||
type=klook(s, content);
|
|
||||||
if(type==SUFFIX) type=suffix2type(name);
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
Kind encoding[]={
|
|
||||||
"x-compress", COMPRESS,
|
|
||||||
"compress", COMPRESS,
|
|
||||||
"x-gzip", GUNZIP,
|
|
||||||
"gzip", GUNZIP,
|
|
||||||
0, 0
|
|
||||||
};
|
|
||||||
int encoding2type(char *s){
|
|
||||||
return klook(s, encoding);
|
|
||||||
}
|
|
|
@ -18,9 +18,7 @@ char *pixcmd[]={
|
||||||
[GIF] "gif -9t",
|
[GIF] "gif -9t",
|
||||||
[JPEG] "jpg -9t",
|
[JPEG] "jpg -9t",
|
||||||
[PNG] "png -9t",
|
[PNG] "png -9t",
|
||||||
[PIC] "fb/3to1 /lib/fb/cmap/rgbv",
|
[BMP] "bmp -9t",
|
||||||
[TIFF] "/sys/lib/mothra/tiffcvt",
|
|
||||||
[XBM] "fb/xbm2pic",
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void storebitmap(Rtext *t, Image *b){
|
void storebitmap(Rtext *t, Image *b){
|
||||||
|
@ -34,7 +32,7 @@ void getimage(Rtext *t, Www *w){
|
||||||
Action *ap;
|
Action *ap;
|
||||||
Url url;
|
Url url;
|
||||||
Image *b;
|
Image *b;
|
||||||
int fd;
|
int fd, typ;
|
||||||
char err[512];
|
char err[512];
|
||||||
Pix *p;
|
Pix *p;
|
||||||
|
|
||||||
|
@ -56,17 +54,12 @@ void getimage(Rtext *t, Www *w){
|
||||||
close(fd);
|
close(fd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if(url.type!=GIF
|
typ = snooptype(fd);
|
||||||
&& url.type!=JPEG
|
if(typ < 0 || typ >= nelem(pixcmd) || pixcmd[typ] == nil){
|
||||||
&& url.type!=PNG
|
|
||||||
&& url.type!=PIC
|
|
||||||
&& url.type!=TIFF
|
|
||||||
&& url.type!=XBM){
|
|
||||||
werrstr("unknown image type");
|
werrstr("unknown image type");
|
||||||
goto Err;
|
goto Err;
|
||||||
}
|
}
|
||||||
|
if((fd = pipeline(pixcmd[typ], fd)) < 0)
|
||||||
if((fd = pipeline(pixcmd[url.type], fd)) < 0)
|
|
||||||
goto Err;
|
goto Err;
|
||||||
if(ap->width>0 || ap->height>0){
|
if(ap->width>0 || ap->height>0){
|
||||||
char buf[80];
|
char buf[80];
|
||||||
|
|
|
@ -70,7 +70,6 @@ struct Hglob{
|
||||||
char *etext; /* end of text buffer */
|
char *etext; /* end of text buffer */
|
||||||
Form *form; /* data for form under construction */
|
Form *form; /* data for form under construction */
|
||||||
Www *dst; /* where the text goes */
|
Www *dst; /* where the text goes */
|
||||||
char charset[NNAME];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
TARG=mothra
|
TARG=mothra
|
||||||
LIB=libpanel/libpanel.$O.a
|
LIB=libpanel/libpanel.$O.a
|
||||||
CFILES= \
|
CFILES= \
|
||||||
filetype.c \
|
snoop.c \
|
||||||
forms.c \
|
forms.c \
|
||||||
getpix.c \
|
getpix.c \
|
||||||
html.syntax.c \
|
html.syntax.c \
|
||||||
|
|
|
@ -30,7 +30,6 @@ Url defurl={
|
||||||
"http://cat-v.org/",
|
"http://cat-v.org/",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
HTML,
|
|
||||||
};
|
};
|
||||||
Url badurl={
|
Url badurl={
|
||||||
"",
|
"",
|
||||||
|
@ -38,7 +37,6 @@ Url badurl={
|
||||||
"No file loaded",
|
"No file loaded",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
HTML,
|
|
||||||
};
|
};
|
||||||
Cursor patientcurs={
|
Cursor patientcurs={
|
||||||
0, 0,
|
0, 0,
|
||||||
|
@ -816,7 +814,6 @@ int fileurlopen(Url *url){
|
||||||
memset(url->fullname, 0, sizeof(url->fullname));
|
memset(url->fullname, 0, sizeof(url->fullname));
|
||||||
strcpy(url->fullname, "file:");
|
strcpy(url->fullname, "file:");
|
||||||
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
|
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
|
||||||
url->type = content2type("application/octet-stream", url->fullname);
|
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -870,20 +867,6 @@ int urlopen(Url *url, int method, char *body){
|
||||||
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
|
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
|
||||||
readstr(url->fullname, sizeof(url->fullname), buf, "url");
|
readstr(url->fullname, sizeof(url->fullname), buf, "url");
|
||||||
readstr(url->tag, sizeof(url->tag), buf, "fragment");
|
readstr(url->tag, sizeof(url->tag), buf, "fragment");
|
||||||
|
|
||||||
snprint(buf, sizeof buf, "%s/%d", mtpt, conn);
|
|
||||||
readstr(buf, sizeof buf, buf, "contenttype");
|
|
||||||
url->charset[0] = 0;
|
|
||||||
if(p = cistrstr(buf, "charset=")){
|
|
||||||
p += 8;
|
|
||||||
strncpy(url->charset, p, sizeof(url->charset));
|
|
||||||
if(p = strchr(url->charset, ';'))
|
|
||||||
*p = 0;
|
|
||||||
}
|
|
||||||
if(p = strchr(buf, ';'))
|
|
||||||
*p = 0;
|
|
||||||
url->type = content2type(buf, url->fullname);
|
|
||||||
|
|
||||||
close(ctlfd);
|
close(ctlfd);
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
@ -931,9 +914,7 @@ void seturl(Url *url, char *urlname, char *base){
|
||||||
strncpy(url->reltext, urlname, sizeof(url->reltext));
|
strncpy(url->reltext, urlname, sizeof(url->reltext));
|
||||||
strncpy(url->basename, base, sizeof(url->basename));
|
strncpy(url->basename, base, sizeof(url->basename));
|
||||||
url->fullname[0] = 0;
|
url->fullname[0] = 0;
|
||||||
url->charset[0] = 0;
|
|
||||||
url->tag[0] = 0;
|
url->tag[0] = 0;
|
||||||
url->type = 0;
|
|
||||||
url->map = 0;
|
url->map = 0;
|
||||||
}
|
}
|
||||||
Url *copyurl(Url *u){
|
Url *copyurl(Url *u){
|
||||||
|
@ -951,7 +932,7 @@ void freeurl(Url *u){
|
||||||
* get the file at the given url
|
* get the file at the given url
|
||||||
*/
|
*/
|
||||||
void geturl(char *urlname, int method, char *body, int cache, int map){
|
void geturl(char *urlname, int method, char *body, int cache, int map){
|
||||||
int i, fd;
|
int i, fd, typ;
|
||||||
char cmd[NNAME];
|
char cmd[NNAME];
|
||||||
int pfd[2];
|
int pfd[2];
|
||||||
Www *w;
|
Www *w;
|
||||||
|
@ -968,18 +949,17 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
message("getting %s", selection->fullname);
|
message("getting %s", selection->fullname);
|
||||||
if(selection->type&COMPRESS)
|
typ = snooptype(fd);
|
||||||
fd=pipeline("/bin/uncompress", fd);
|
if(typ == GUNZIP){
|
||||||
else if(selection->type&GUNZIP)
|
|
||||||
fd=pipeline("/bin/gunzip", fd);
|
fd=pipeline("/bin/gunzip", fd);
|
||||||
switch(selection->type&~COMPRESSION){
|
typ = snooptype(fd);
|
||||||
|
}
|
||||||
|
switch(typ){
|
||||||
default:
|
default:
|
||||||
message("Bad type %x in geturl", selection->type);
|
message("Bad type %x in geturl", typ);
|
||||||
break;
|
break;
|
||||||
case HTML:
|
case HTML:
|
||||||
snprint(cmd, sizeof(cmd), selection->charset[0] ?
|
fd = pipeline("/bin/uhtml", fd);
|
||||||
"/bin/uhtml -c %s" : "/bin/uhtml", selection->charset);
|
|
||||||
fd = pipeline(cmd, fd);
|
|
||||||
case PLAIN:
|
case PLAIN:
|
||||||
w = www(i = wwwtop++);
|
w = www(i = wwwtop++);
|
||||||
if(i >= NWWW){
|
if(i >= NWWW){
|
||||||
|
@ -1005,24 +985,18 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
|
||||||
w->url=copyurl(selection);
|
w->url=copyurl(selection);
|
||||||
w->finished = 0;
|
w->finished = 0;
|
||||||
w->alldone = 0;
|
w->alldone = 0;
|
||||||
gettext(w, fd, selection->type&~COMPRESSION);
|
gettext(w, fd, typ);
|
||||||
plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
|
plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
|
||||||
if(defdisplay) pldraw(list, screen);
|
if(defdisplay) pldraw(list, screen);
|
||||||
setcurrent(i, selection->tag);
|
setcurrent(i, selection->tag);
|
||||||
break;
|
break;
|
||||||
case POSTSCRIPT:
|
|
||||||
case GIF:
|
case GIF:
|
||||||
case JPEG:
|
case JPEG:
|
||||||
case PNG:
|
case PNG:
|
||||||
case PDF:
|
case BMP:
|
||||||
|
case PAGE:
|
||||||
filter("page -w", fd);
|
filter("page -w", fd);
|
||||||
break;
|
break;
|
||||||
case TIFF:
|
|
||||||
filter("/sys/lib/mothra/tiffview", fd);
|
|
||||||
break;
|
|
||||||
case XBM:
|
|
||||||
filter("fb/xbm2pic|fb/9v", fd);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,8 +27,6 @@ struct Url{
|
||||||
char basename[NNAME];
|
char basename[NNAME];
|
||||||
char reltext[NNAME];
|
char reltext[NNAME];
|
||||||
char tag[NNAME];
|
char tag[NNAME];
|
||||||
char charset[NNAME];
|
|
||||||
int type;
|
|
||||||
int map; /* is this an image map? */
|
int map; /* is this an image map? */
|
||||||
};
|
};
|
||||||
struct Www{
|
struct Www{
|
||||||
|
@ -43,29 +41,15 @@ struct Www{
|
||||||
int alldone; /* page will not change further -- used to adjust cursor */
|
int alldone; /* page will not change further -- used to adjust cursor */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* url reference types -- COMPRESS and GUNZIP are flags that can modify any other type
|
|
||||||
* Changing these in a non-downward compatible way spoils cache entries
|
|
||||||
*/
|
|
||||||
enum{
|
enum{
|
||||||
GIF=1,
|
|
||||||
HTML,
|
|
||||||
JPEG,
|
|
||||||
PIC,
|
|
||||||
TIFF,
|
|
||||||
AUDIO,
|
|
||||||
PLAIN,
|
PLAIN,
|
||||||
XBM,
|
HTML,
|
||||||
POSTSCRIPT,
|
GIF,
|
||||||
FORWARD,
|
JPEG,
|
||||||
PDF,
|
|
||||||
SUFFIX,
|
|
||||||
ZIP,
|
|
||||||
PNG,
|
PNG,
|
||||||
|
BMP,
|
||||||
COMPRESS=16,
|
GUNZIP,
|
||||||
GUNZIP=32,
|
PAGE,
|
||||||
COMPRESSION=16+32,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -102,9 +86,7 @@ void *emalloc(int);
|
||||||
void *emallocz(int, int);
|
void *emallocz(int, int);
|
||||||
void setbitmap(Rtext *);
|
void setbitmap(Rtext *);
|
||||||
void message(char *, ...);
|
void message(char *, ...);
|
||||||
int suffix2type(char *);
|
int snooptype(int fd);
|
||||||
int content2type(char *, char *);
|
|
||||||
int encoding2type(char *);
|
|
||||||
void mkfieldpanel(Rtext *);
|
void mkfieldpanel(Rtext *);
|
||||||
void geturl(char *, int, char *, int, int);
|
void geturl(char *, int, char *, int, int);
|
||||||
char version[];
|
char version[];
|
||||||
|
|
|
@ -609,8 +609,6 @@ void plrdhtml(char *name, int fd, Www *dst){
|
||||||
dst->title[0]='\0';
|
dst->title[0]='\0';
|
||||||
g.spacc=0;
|
g.spacc=0;
|
||||||
g.form=0;
|
g.form=0;
|
||||||
g.charset[0] = '\0';
|
|
||||||
strncpy(g.charset, dst->url->charset, sizeof(g.charset));
|
|
||||||
|
|
||||||
for(;;) switch(pl_gettoken(&g)){
|
for(;;) switch(pl_gettoken(&g)){
|
||||||
case TAG:
|
case TAG:
|
||||||
|
|
65
sys/src/cmd/mothra/snoop.c
Normal file
65
sys/src/cmd/mothra/snoop.c
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
#include <u.h>
|
||||||
|
#include <libc.h>
|
||||||
|
#include <draw.h>
|
||||||
|
#include <event.h>
|
||||||
|
#include <panel.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include "mothra.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
snooptype(int fd)
|
||||||
|
{
|
||||||
|
int pfd[2], typ, n;
|
||||||
|
char buf[1024];
|
||||||
|
|
||||||
|
typ = PLAIN;
|
||||||
|
if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
|
||||||
|
return typ;
|
||||||
|
buf[n] = 0;
|
||||||
|
if(cistrstr(buf, "<?xml") ||
|
||||||
|
cistrstr(buf, "<!DOCTYPE") ||
|
||||||
|
cistrstr(buf, "<HTML"))
|
||||||
|
typ = HTML;
|
||||||
|
else if(memcmp(buf, "\x1F\x8B", 2) == 0)
|
||||||
|
typ = GUNZIP;
|
||||||
|
else if(memcmp(buf, "\377\330\377", 3) == 0)
|
||||||
|
typ = JPEG;
|
||||||
|
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
|
||||||
|
typ = PNG;
|
||||||
|
else if(memcmp(buf, "GIF", 3) == 0)
|
||||||
|
typ = GIF;
|
||||||
|
else if(memcmp(buf, "BM", 2) == 0)
|
||||||
|
typ = BMP;
|
||||||
|
else if(memcmp(buf, "PK\x03\x04", 4) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "x T ", 4) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
|
||||||
|
typ = PAGE;
|
||||||
|
if(pipe(pfd) >= 0){
|
||||||
|
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
|
||||||
|
case -1:
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
close(pfd[0]);
|
||||||
|
do {
|
||||||
|
if(write(pfd[1], buf, n) != n)
|
||||||
|
break;
|
||||||
|
} while((n = read(fd, buf, sizeof(buf))) > 0);
|
||||||
|
exits(nil);
|
||||||
|
default:
|
||||||
|
dup(pfd[0], fd);
|
||||||
|
}
|
||||||
|
close(pfd[1]);
|
||||||
|
close(pfd[0]);
|
||||||
|
}
|
||||||
|
return typ;
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue