mothra: ignore http content-type and encoding and just do content sniffing

This commit is contained in:
cinap_lenrek 2011-10-03 20:40:43 +02:00
parent 001ce57253
commit 85d6170c35
8 changed files with 90 additions and 185 deletions

View file

@ -1,106 +0,0 @@
#include <u.h>
#include <libc.h>
#include <draw.h>
#include <event.h>
#include <panel.h>
#include <ctype.h>
#include "mothra.h"
typedef struct Kind Kind;
struct Kind{
char *name;
int kind;
};
int klook(char *s, Kind *k){
while(k->name && cistrcmp(k->name, s)!=0)
k++;
return k->kind;
}
Kind suffix[]={
".html", HTML,
".htm", HTML,
"/", HTML,
".gif", GIF,
".jpe", JPEG,
".jpg", JPEG,
".jpeg", JPEG,
".png", PNG,
".pic", PIC,
".au", AUDIO,
".tif", TIFF,
".tiff", TIFF,
".xbm", XBM,
".txt", PLAIN,
".text", PLAIN,
".ai", POSTSCRIPT,
".eps", POSTSCRIPT,
".ps", POSTSCRIPT,
".pdf", PDF,
".zip", ZIP,
0, HTML
};
int suflook(char *s, int len, Kind *k){
int l;
while(k->name){
l=strlen(k->name);
if(l<=len && cistrcmp(k->name, s+len-l)==0) return k->kind;
k++;
}
return k->kind;
}
int suffix2type(char *name){
int len, kind, restore;
char *s;
len=strlen(name);
if(len>=2 && cistrcmp(name+len-2, ".Z")==0){
kind=COMPRESS;
len-=2;
}
else if(len>=3 && cistrcmp(name+len-3, ".gz")==0){
kind=GUNZIP;
len-=3;
}
else
kind=0;
restore=name[len];
name[len]='\0';
for(s=name+len;s!=name && *s!='.';--s);
kind|=suflook(name, len, suffix);
name[len]=restore;
return kind;
}
Kind content[]={
"text/html", HTML,
"text/x-html", HTML,
"application/html", HTML,
"application/x-html", HTML,
"text/plain", PLAIN,
"image/gif", GIF,
"image/jpeg", JPEG,
"image/pjpeg", JPEG,
"image/png", PNG,
"image/tiff", TIFF,
"image/x-xbitmap", XBM,
"image/x-bitmap", XBM,
"image/xbitmap", XBM,
"application/postscript", POSTSCRIPT,
"application/pdf", PDF,
"application/octet-stream", SUFFIX,
"application/zip", ZIP,
0, SUFFIX
};
int content2type(char *s, char *name){
int type;
type=klook(s, content);
if(type==SUFFIX) type=suffix2type(name);
return type;
}
Kind encoding[]={
"x-compress", COMPRESS,
"compress", COMPRESS,
"x-gzip", GUNZIP,
"gzip", GUNZIP,
0, 0
};
int encoding2type(char *s){
return klook(s, encoding);
}

View file

@ -18,9 +18,7 @@ char *pixcmd[]={
[GIF] "gif -9t", [GIF] "gif -9t",
[JPEG] "jpg -9t", [JPEG] "jpg -9t",
[PNG] "png -9t", [PNG] "png -9t",
[PIC] "fb/3to1 /lib/fb/cmap/rgbv", [BMP] "bmp -9t",
[TIFF] "/sys/lib/mothra/tiffcvt",
[XBM] "fb/xbm2pic",
}; };
void storebitmap(Rtext *t, Image *b){ void storebitmap(Rtext *t, Image *b){
@ -34,7 +32,7 @@ void getimage(Rtext *t, Www *w){
Action *ap; Action *ap;
Url url; Url url;
Image *b; Image *b;
int fd; int fd, typ;
char err[512]; char err[512];
Pix *p; Pix *p;
@ -56,17 +54,12 @@ void getimage(Rtext *t, Www *w){
close(fd); close(fd);
return; return;
} }
if(url.type!=GIF typ = snooptype(fd);
&& url.type!=JPEG if(typ < 0 || typ >= nelem(pixcmd) || pixcmd[typ] == nil){
&& url.type!=PNG
&& url.type!=PIC
&& url.type!=TIFF
&& url.type!=XBM){
werrstr("unknown image type"); werrstr("unknown image type");
goto Err; goto Err;
} }
if((fd = pipeline(pixcmd[typ], fd)) < 0)
if((fd = pipeline(pixcmd[url.type], fd)) < 0)
goto Err; goto Err;
if(ap->width>0 || ap->height>0){ if(ap->width>0 || ap->height>0){
char buf[80]; char buf[80];

View file

@ -70,7 +70,6 @@ struct Hglob{
char *etext; /* end of text buffer */ char *etext; /* end of text buffer */
Form *form; /* data for form under construction */ Form *form; /* data for form under construction */
Www *dst; /* where the text goes */ Www *dst; /* where the text goes */
char charset[NNAME];
}; };
/* /*

View file

@ -3,7 +3,7 @@
TARG=mothra TARG=mothra
LIB=libpanel/libpanel.$O.a LIB=libpanel/libpanel.$O.a
CFILES= \ CFILES= \
filetype.c \ snoop.c \
forms.c \ forms.c \
getpix.c \ getpix.c \
html.syntax.c \ html.syntax.c \

View file

@ -30,7 +30,6 @@ Url defurl={
"http://cat-v.org/", "http://cat-v.org/",
"", "",
"", "",
HTML,
}; };
Url badurl={ Url badurl={
"", "",
@ -38,7 +37,6 @@ Url badurl={
"No file loaded", "No file loaded",
"", "",
"", "",
HTML,
}; };
Cursor patientcurs={ Cursor patientcurs={
0, 0, 0, 0,
@ -816,7 +814,6 @@ int fileurlopen(Url *url){
memset(url->fullname, 0, sizeof(url->fullname)); memset(url->fullname, 0, sizeof(url->fullname));
strcpy(url->fullname, "file:"); strcpy(url->fullname, "file:");
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6); fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
url->type = content2type("application/octet-stream", url->fullname);
return fd; return fd;
} }
@ -870,20 +867,6 @@ int urlopen(Url *url, int method, char *body){
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn); snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
readstr(url->fullname, sizeof(url->fullname), buf, "url"); readstr(url->fullname, sizeof(url->fullname), buf, "url");
readstr(url->tag, sizeof(url->tag), buf, "fragment"); readstr(url->tag, sizeof(url->tag), buf, "fragment");
snprint(buf, sizeof buf, "%s/%d", mtpt, conn);
readstr(buf, sizeof buf, buf, "contenttype");
url->charset[0] = 0;
if(p = cistrstr(buf, "charset=")){
p += 8;
strncpy(url->charset, p, sizeof(url->charset));
if(p = strchr(url->charset, ';'))
*p = 0;
}
if(p = strchr(buf, ';'))
*p = 0;
url->type = content2type(buf, url->fullname);
close(ctlfd); close(ctlfd);
return fd; return fd;
} }
@ -931,9 +914,7 @@ void seturl(Url *url, char *urlname, char *base){
strncpy(url->reltext, urlname, sizeof(url->reltext)); strncpy(url->reltext, urlname, sizeof(url->reltext));
strncpy(url->basename, base, sizeof(url->basename)); strncpy(url->basename, base, sizeof(url->basename));
url->fullname[0] = 0; url->fullname[0] = 0;
url->charset[0] = 0;
url->tag[0] = 0; url->tag[0] = 0;
url->type = 0;
url->map = 0; url->map = 0;
} }
Url *copyurl(Url *u){ Url *copyurl(Url *u){
@ -951,7 +932,7 @@ void freeurl(Url *u){
* get the file at the given url * get the file at the given url
*/ */
void geturl(char *urlname, int method, char *body, int cache, int map){ void geturl(char *urlname, int method, char *body, int cache, int map){
int i, fd; int i, fd, typ;
char cmd[NNAME]; char cmd[NNAME];
int pfd[2]; int pfd[2];
Www *w; Www *w;
@ -968,18 +949,17 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
break; break;
} }
message("getting %s", selection->fullname); message("getting %s", selection->fullname);
if(selection->type&COMPRESS) typ = snooptype(fd);
fd=pipeline("/bin/uncompress", fd); if(typ == GUNZIP){
else if(selection->type&GUNZIP)
fd=pipeline("/bin/gunzip", fd); fd=pipeline("/bin/gunzip", fd);
switch(selection->type&~COMPRESSION){ typ = snooptype(fd);
}
switch(typ){
default: default:
message("Bad type %x in geturl", selection->type); message("Bad type %x in geturl", typ);
break; break;
case HTML: case HTML:
snprint(cmd, sizeof(cmd), selection->charset[0] ? fd = pipeline("/bin/uhtml", fd);
"/bin/uhtml -c %s" : "/bin/uhtml", selection->charset);
fd = pipeline(cmd, fd);
case PLAIN: case PLAIN:
w = www(i = wwwtop++); w = www(i = wwwtop++);
if(i >= NWWW){ if(i >= NWWW){
@ -1005,24 +985,18 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
w->url=copyurl(selection); w->url=copyurl(selection);
w->finished = 0; w->finished = 0;
w->alldone = 0; w->alldone = 0;
gettext(w, fd, selection->type&~COMPRESSION); gettext(w, fd, typ);
plinitlist(list, PACKN|FILLX, genwww, 8, doprev); plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
if(defdisplay) pldraw(list, screen); if(defdisplay) pldraw(list, screen);
setcurrent(i, selection->tag); setcurrent(i, selection->tag);
break; break;
case POSTSCRIPT:
case GIF: case GIF:
case JPEG: case JPEG:
case PNG: case PNG:
case PDF: case BMP:
case PAGE:
filter("page -w", fd); filter("page -w", fd);
break; break;
case TIFF:
filter("/sys/lib/mothra/tiffview", fd);
break;
case XBM:
filter("fb/xbm2pic|fb/9v", fd);
break;
} }
break; break;
} }

View file

@ -27,8 +27,6 @@ struct Url{
char basename[NNAME]; char basename[NNAME];
char reltext[NNAME]; char reltext[NNAME];
char tag[NNAME]; char tag[NNAME];
char charset[NNAME];
int type;
int map; /* is this an image map? */ int map; /* is this an image map? */
}; };
struct Www{ struct Www{
@ -43,29 +41,15 @@ struct Www{
int alldone; /* page will not change further -- used to adjust cursor */ int alldone; /* page will not change further -- used to adjust cursor */
}; };
/*
* url reference types -- COMPRESS and GUNZIP are flags that can modify any other type
* Changing these in a non-downward compatible way spoils cache entries
*/
enum{ enum{
GIF=1,
HTML,
JPEG,
PIC,
TIFF,
AUDIO,
PLAIN, PLAIN,
XBM, HTML,
POSTSCRIPT, GIF,
FORWARD, JPEG,
PDF,
SUFFIX,
ZIP,
PNG, PNG,
BMP,
COMPRESS=16, GUNZIP,
GUNZIP=32, PAGE,
COMPRESSION=16+32,
}; };
/* /*
@ -102,9 +86,7 @@ void *emalloc(int);
void *emallocz(int, int); void *emallocz(int, int);
void setbitmap(Rtext *); void setbitmap(Rtext *);
void message(char *, ...); void message(char *, ...);
int suffix2type(char *); int snooptype(int fd);
int content2type(char *, char *);
int encoding2type(char *);
void mkfieldpanel(Rtext *); void mkfieldpanel(Rtext *);
void geturl(char *, int, char *, int, int); void geturl(char *, int, char *, int, int);
char version[]; char version[];

View file

@ -609,8 +609,6 @@ void plrdhtml(char *name, int fd, Www *dst){
dst->title[0]='\0'; dst->title[0]='\0';
g.spacc=0; g.spacc=0;
g.form=0; g.form=0;
g.charset[0] = '\0';
strncpy(g.charset, dst->url->charset, sizeof(g.charset));
for(;;) switch(pl_gettoken(&g)){ for(;;) switch(pl_gettoken(&g)){
case TAG: case TAG:

View file

@ -0,0 +1,65 @@
#include <u.h>
#include <libc.h>
#include <draw.h>
#include <event.h>
#include <panel.h>
#include <ctype.h>
#include "mothra.h"
int
snooptype(int fd)
{
int pfd[2], typ, n;
char buf[1024];
typ = PLAIN;
if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
return typ;
buf[n] = 0;
if(cistrstr(buf, "<?xml") ||
cistrstr(buf, "<!DOCTYPE") ||
cistrstr(buf, "<HTML"))
typ = HTML;
else if(memcmp(buf, "\x1F\x8B", 2) == 0)
typ = GUNZIP;
else if(memcmp(buf, "\377\330\377", 3) == 0)
typ = JPEG;
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
typ = PNG;
else if(memcmp(buf, "GIF", 3) == 0)
typ = GIF;
else if(memcmp(buf, "BM", 2) == 0)
typ = BMP;
else if(memcmp(buf, "PK\x03\x04", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
typ = PAGE;
else if(memcmp(buf, "x T ", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
typ = PAGE;
if(pipe(pfd) >= 0){
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1:
break;
case 0:
close(pfd[0]);
do {
if(write(pfd[1], buf, n) != n)
break;
} while((n = read(fd, buf, sizeof(buf))) > 0);
exits(nil);
default:
dup(pfd[0], fd);
}
close(pfd[1]);
close(pfd[0]);
}
return typ;
}