mothra: ignore http content-type and encoding and just do content sniffing
This commit is contained in:
parent
001ce57253
commit
85d6170c35
8 changed files with 90 additions and 185 deletions
|
@ -1,106 +0,0 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <draw.h>
|
||||
#include <event.h>
|
||||
#include <panel.h>
|
||||
#include <ctype.h>
|
||||
#include "mothra.h"
|
||||
typedef struct Kind Kind;
|
||||
struct Kind{
|
||||
char *name;
|
||||
int kind;
|
||||
};
|
||||
int klook(char *s, Kind *k){
|
||||
while(k->name && cistrcmp(k->name, s)!=0)
|
||||
k++;
|
||||
return k->kind;
|
||||
}
|
||||
Kind suffix[]={
|
||||
".html", HTML,
|
||||
".htm", HTML,
|
||||
"/", HTML,
|
||||
".gif", GIF,
|
||||
".jpe", JPEG,
|
||||
".jpg", JPEG,
|
||||
".jpeg", JPEG,
|
||||
".png", PNG,
|
||||
".pic", PIC,
|
||||
".au", AUDIO,
|
||||
".tif", TIFF,
|
||||
".tiff", TIFF,
|
||||
".xbm", XBM,
|
||||
".txt", PLAIN,
|
||||
".text", PLAIN,
|
||||
".ai", POSTSCRIPT,
|
||||
".eps", POSTSCRIPT,
|
||||
".ps", POSTSCRIPT,
|
||||
".pdf", PDF,
|
||||
".zip", ZIP,
|
||||
0, HTML
|
||||
};
|
||||
int suflook(char *s, int len, Kind *k){
|
||||
int l;
|
||||
while(k->name){
|
||||
l=strlen(k->name);
|
||||
if(l<=len && cistrcmp(k->name, s+len-l)==0) return k->kind;
|
||||
k++;
|
||||
}
|
||||
return k->kind;
|
||||
}
|
||||
int suffix2type(char *name){
|
||||
int len, kind, restore;
|
||||
char *s;
|
||||
len=strlen(name);
|
||||
if(len>=2 && cistrcmp(name+len-2, ".Z")==0){
|
||||
kind=COMPRESS;
|
||||
len-=2;
|
||||
}
|
||||
else if(len>=3 && cistrcmp(name+len-3, ".gz")==0){
|
||||
kind=GUNZIP;
|
||||
len-=3;
|
||||
}
|
||||
else
|
||||
kind=0;
|
||||
restore=name[len];
|
||||
name[len]='\0';
|
||||
for(s=name+len;s!=name && *s!='.';--s);
|
||||
kind|=suflook(name, len, suffix);
|
||||
name[len]=restore;
|
||||
return kind;
|
||||
}
|
||||
Kind content[]={
|
||||
"text/html", HTML,
|
||||
"text/x-html", HTML,
|
||||
"application/html", HTML,
|
||||
"application/x-html", HTML,
|
||||
"text/plain", PLAIN,
|
||||
"image/gif", GIF,
|
||||
"image/jpeg", JPEG,
|
||||
"image/pjpeg", JPEG,
|
||||
"image/png", PNG,
|
||||
"image/tiff", TIFF,
|
||||
"image/x-xbitmap", XBM,
|
||||
"image/x-bitmap", XBM,
|
||||
"image/xbitmap", XBM,
|
||||
"application/postscript", POSTSCRIPT,
|
||||
"application/pdf", PDF,
|
||||
"application/octet-stream", SUFFIX,
|
||||
"application/zip", ZIP,
|
||||
0, SUFFIX
|
||||
};
|
||||
int content2type(char *s, char *name){
|
||||
int type;
|
||||
type=klook(s, content);
|
||||
if(type==SUFFIX) type=suffix2type(name);
|
||||
return type;
|
||||
}
|
||||
Kind encoding[]={
|
||||
"x-compress", COMPRESS,
|
||||
"compress", COMPRESS,
|
||||
"x-gzip", GUNZIP,
|
||||
"gzip", GUNZIP,
|
||||
0, 0
|
||||
};
|
||||
int encoding2type(char *s){
|
||||
return klook(s, encoding);
|
||||
}
|
|
@ -18,9 +18,7 @@ char *pixcmd[]={
|
|||
[GIF] "gif -9t",
|
||||
[JPEG] "jpg -9t",
|
||||
[PNG] "png -9t",
|
||||
[PIC] "fb/3to1 /lib/fb/cmap/rgbv",
|
||||
[TIFF] "/sys/lib/mothra/tiffcvt",
|
||||
[XBM] "fb/xbm2pic",
|
||||
[BMP] "bmp -9t",
|
||||
};
|
||||
|
||||
void storebitmap(Rtext *t, Image *b){
|
||||
|
@ -34,7 +32,7 @@ void getimage(Rtext *t, Www *w){
|
|||
Action *ap;
|
||||
Url url;
|
||||
Image *b;
|
||||
int fd;
|
||||
int fd, typ;
|
||||
char err[512];
|
||||
Pix *p;
|
||||
|
||||
|
@ -56,17 +54,12 @@ void getimage(Rtext *t, Www *w){
|
|||
close(fd);
|
||||
return;
|
||||
}
|
||||
if(url.type!=GIF
|
||||
&& url.type!=JPEG
|
||||
&& url.type!=PNG
|
||||
&& url.type!=PIC
|
||||
&& url.type!=TIFF
|
||||
&& url.type!=XBM){
|
||||
typ = snooptype(fd);
|
||||
if(typ < 0 || typ >= nelem(pixcmd) || pixcmd[typ] == nil){
|
||||
werrstr("unknown image type");
|
||||
goto Err;
|
||||
}
|
||||
|
||||
if((fd = pipeline(pixcmd[url.type], fd)) < 0)
|
||||
if((fd = pipeline(pixcmd[typ], fd)) < 0)
|
||||
goto Err;
|
||||
if(ap->width>0 || ap->height>0){
|
||||
char buf[80];
|
||||
|
|
|
@ -70,7 +70,6 @@ struct Hglob{
|
|||
char *etext; /* end of text buffer */
|
||||
Form *form; /* data for form under construction */
|
||||
Www *dst; /* where the text goes */
|
||||
char charset[NNAME];
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
TARG=mothra
|
||||
LIB=libpanel/libpanel.$O.a
|
||||
CFILES= \
|
||||
filetype.c \
|
||||
snoop.c \
|
||||
forms.c \
|
||||
getpix.c \
|
||||
html.syntax.c \
|
||||
|
|
|
@ -30,7 +30,6 @@ Url defurl={
|
|||
"http://cat-v.org/",
|
||||
"",
|
||||
"",
|
||||
HTML,
|
||||
};
|
||||
Url badurl={
|
||||
"",
|
||||
|
@ -38,7 +37,6 @@ Url badurl={
|
|||
"No file loaded",
|
||||
"",
|
||||
"",
|
||||
HTML,
|
||||
};
|
||||
Cursor patientcurs={
|
||||
0, 0,
|
||||
|
@ -816,7 +814,6 @@ int fileurlopen(Url *url){
|
|||
memset(url->fullname, 0, sizeof(url->fullname));
|
||||
strcpy(url->fullname, "file:");
|
||||
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
|
||||
url->type = content2type("application/octet-stream", url->fullname);
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
@ -870,20 +867,6 @@ int urlopen(Url *url, int method, char *body){
|
|||
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
|
||||
readstr(url->fullname, sizeof(url->fullname), buf, "url");
|
||||
readstr(url->tag, sizeof(url->tag), buf, "fragment");
|
||||
|
||||
snprint(buf, sizeof buf, "%s/%d", mtpt, conn);
|
||||
readstr(buf, sizeof buf, buf, "contenttype");
|
||||
url->charset[0] = 0;
|
||||
if(p = cistrstr(buf, "charset=")){
|
||||
p += 8;
|
||||
strncpy(url->charset, p, sizeof(url->charset));
|
||||
if(p = strchr(url->charset, ';'))
|
||||
*p = 0;
|
||||
}
|
||||
if(p = strchr(buf, ';'))
|
||||
*p = 0;
|
||||
url->type = content2type(buf, url->fullname);
|
||||
|
||||
close(ctlfd);
|
||||
return fd;
|
||||
}
|
||||
|
@ -931,9 +914,7 @@ void seturl(Url *url, char *urlname, char *base){
|
|||
strncpy(url->reltext, urlname, sizeof(url->reltext));
|
||||
strncpy(url->basename, base, sizeof(url->basename));
|
||||
url->fullname[0] = 0;
|
||||
url->charset[0] = 0;
|
||||
url->tag[0] = 0;
|
||||
url->type = 0;
|
||||
url->map = 0;
|
||||
}
|
||||
Url *copyurl(Url *u){
|
||||
|
@ -951,7 +932,7 @@ void freeurl(Url *u){
|
|||
* get the file at the given url
|
||||
*/
|
||||
void geturl(char *urlname, int method, char *body, int cache, int map){
|
||||
int i, fd;
|
||||
int i, fd, typ;
|
||||
char cmd[NNAME];
|
||||
int pfd[2];
|
||||
Www *w;
|
||||
|
@ -968,18 +949,17 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
|
|||
break;
|
||||
}
|
||||
message("getting %s", selection->fullname);
|
||||
if(selection->type&COMPRESS)
|
||||
fd=pipeline("/bin/uncompress", fd);
|
||||
else if(selection->type&GUNZIP)
|
||||
typ = snooptype(fd);
|
||||
if(typ == GUNZIP){
|
||||
fd=pipeline("/bin/gunzip", fd);
|
||||
switch(selection->type&~COMPRESSION){
|
||||
typ = snooptype(fd);
|
||||
}
|
||||
switch(typ){
|
||||
default:
|
||||
message("Bad type %x in geturl", selection->type);
|
||||
message("Bad type %x in geturl", typ);
|
||||
break;
|
||||
case HTML:
|
||||
snprint(cmd, sizeof(cmd), selection->charset[0] ?
|
||||
"/bin/uhtml -c %s" : "/bin/uhtml", selection->charset);
|
||||
fd = pipeline(cmd, fd);
|
||||
fd = pipeline("/bin/uhtml", fd);
|
||||
case PLAIN:
|
||||
w = www(i = wwwtop++);
|
||||
if(i >= NWWW){
|
||||
|
@ -1005,24 +985,18 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
|
|||
w->url=copyurl(selection);
|
||||
w->finished = 0;
|
||||
w->alldone = 0;
|
||||
gettext(w, fd, selection->type&~COMPRESSION);
|
||||
gettext(w, fd, typ);
|
||||
plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
|
||||
if(defdisplay) pldraw(list, screen);
|
||||
setcurrent(i, selection->tag);
|
||||
break;
|
||||
case POSTSCRIPT:
|
||||
case GIF:
|
||||
case JPEG:
|
||||
case PNG:
|
||||
case PDF:
|
||||
case BMP:
|
||||
case PAGE:
|
||||
filter("page -w", fd);
|
||||
break;
|
||||
case TIFF:
|
||||
filter("/sys/lib/mothra/tiffview", fd);
|
||||
break;
|
||||
case XBM:
|
||||
filter("fb/xbm2pic|fb/9v", fd);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -27,9 +27,7 @@ struct Url{
|
|||
char basename[NNAME];
|
||||
char reltext[NNAME];
|
||||
char tag[NNAME];
|
||||
char charset[NNAME];
|
||||
int type;
|
||||
int map; /* is this an image map? */
|
||||
int map; /* is this an image map? */
|
||||
};
|
||||
struct Www{
|
||||
Url *url;
|
||||
|
@ -43,29 +41,15 @@ struct Www{
|
|||
int alldone; /* page will not change further -- used to adjust cursor */
|
||||
};
|
||||
|
||||
/*
|
||||
* url reference types -- COMPRESS and GUNZIP are flags that can modify any other type
|
||||
* Changing these in a non-downward compatible way spoils cache entries
|
||||
*/
|
||||
enum{
|
||||
GIF=1,
|
||||
HTML,
|
||||
JPEG,
|
||||
PIC,
|
||||
TIFF,
|
||||
AUDIO,
|
||||
PLAIN,
|
||||
XBM,
|
||||
POSTSCRIPT,
|
||||
FORWARD,
|
||||
PDF,
|
||||
SUFFIX,
|
||||
ZIP,
|
||||
HTML,
|
||||
GIF,
|
||||
JPEG,
|
||||
PNG,
|
||||
|
||||
COMPRESS=16,
|
||||
GUNZIP=32,
|
||||
COMPRESSION=16+32,
|
||||
BMP,
|
||||
GUNZIP,
|
||||
PAGE,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -102,9 +86,7 @@ void *emalloc(int);
|
|||
void *emallocz(int, int);
|
||||
void setbitmap(Rtext *);
|
||||
void message(char *, ...);
|
||||
int suffix2type(char *);
|
||||
int content2type(char *, char *);
|
||||
int encoding2type(char *);
|
||||
int snooptype(int fd);
|
||||
void mkfieldpanel(Rtext *);
|
||||
void geturl(char *, int, char *, int, int);
|
||||
char version[];
|
||||
|
|
|
@ -609,8 +609,6 @@ void plrdhtml(char *name, int fd, Www *dst){
|
|||
dst->title[0]='\0';
|
||||
g.spacc=0;
|
||||
g.form=0;
|
||||
g.charset[0] = '\0';
|
||||
strncpy(g.charset, dst->url->charset, sizeof(g.charset));
|
||||
|
||||
for(;;) switch(pl_gettoken(&g)){
|
||||
case TAG:
|
||||
|
|
65
sys/src/cmd/mothra/snoop.c
Normal file
65
sys/src/cmd/mothra/snoop.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <draw.h>
|
||||
#include <event.h>
|
||||
#include <panel.h>
|
||||
#include <ctype.h>
|
||||
#include "mothra.h"
|
||||
|
||||
int
|
||||
snooptype(int fd)
|
||||
{
|
||||
int pfd[2], typ, n;
|
||||
char buf[1024];
|
||||
|
||||
typ = PLAIN;
|
||||
if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
|
||||
return typ;
|
||||
buf[n] = 0;
|
||||
if(cistrstr(buf, "<?xml") ||
|
||||
cistrstr(buf, "<!DOCTYPE") ||
|
||||
cistrstr(buf, "<HTML"))
|
||||
typ = HTML;
|
||||
else if(memcmp(buf, "\x1F\x8B", 2) == 0)
|
||||
typ = GUNZIP;
|
||||
else if(memcmp(buf, "\377\330\377", 3) == 0)
|
||||
typ = JPEG;
|
||||
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
|
||||
typ = PNG;
|
||||
else if(memcmp(buf, "GIF", 3) == 0)
|
||||
typ = GIF;
|
||||
else if(memcmp(buf, "BM", 2) == 0)
|
||||
typ = BMP;
|
||||
else if(memcmp(buf, "PK\x03\x04", 4) == 0)
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "x T ", 4) == 0)
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
|
||||
typ = PAGE;
|
||||
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
|
||||
typ = PAGE;
|
||||
if(pipe(pfd) >= 0){
|
||||
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
|
||||
case -1:
|
||||
break;
|
||||
case 0:
|
||||
close(pfd[0]);
|
||||
do {
|
||||
if(write(pfd[1], buf, n) != n)
|
||||
break;
|
||||
} while((n = read(fd, buf, sizeof(buf))) > 0);
|
||||
exits(nil);
|
||||
default:
|
||||
dup(pfd[0], fd);
|
||||
}
|
||||
close(pfd[1]);
|
||||
close(pfd[0]);
|
||||
}
|
||||
return typ;
|
||||
}
|
Loading…
Reference in a new issue