mothra: ignore http content-type and encoding and just do content sniffing

This commit is contained in:
cinap_lenrek 2011-10-03 20:40:43 +02:00
parent 001ce57253
commit 85d6170c35
8 changed files with 90 additions and 185 deletions

View file

@ -1,106 +0,0 @@
#include <u.h>
#include <libc.h>
#include <draw.h>
#include <event.h>
#include <panel.h>
#include <ctype.h>
#include "mothra.h"
typedef struct Kind Kind;
struct Kind{
char *name;
int kind;
};
int klook(char *s, Kind *k){
while(k->name && cistrcmp(k->name, s)!=0)
k++;
return k->kind;
}
Kind suffix[]={
".html", HTML,
".htm", HTML,
"/", HTML,
".gif", GIF,
".jpe", JPEG,
".jpg", JPEG,
".jpeg", JPEG,
".png", PNG,
".pic", PIC,
".au", AUDIO,
".tif", TIFF,
".tiff", TIFF,
".xbm", XBM,
".txt", PLAIN,
".text", PLAIN,
".ai", POSTSCRIPT,
".eps", POSTSCRIPT,
".ps", POSTSCRIPT,
".pdf", PDF,
".zip", ZIP,
0, HTML
};
int suflook(char *s, int len, Kind *k){
int l;
while(k->name){
l=strlen(k->name);
if(l<=len && cistrcmp(k->name, s+len-l)==0) return k->kind;
k++;
}
return k->kind;
}
int suffix2type(char *name){
int len, kind, restore;
char *s;
len=strlen(name);
if(len>=2 && cistrcmp(name+len-2, ".Z")==0){
kind=COMPRESS;
len-=2;
}
else if(len>=3 && cistrcmp(name+len-3, ".gz")==0){
kind=GUNZIP;
len-=3;
}
else
kind=0;
restore=name[len];
name[len]='\0';
for(s=name+len;s!=name && *s!='.';--s);
kind|=suflook(name, len, suffix);
name[len]=restore;
return kind;
}
Kind content[]={
"text/html", HTML,
"text/x-html", HTML,
"application/html", HTML,
"application/x-html", HTML,
"text/plain", PLAIN,
"image/gif", GIF,
"image/jpeg", JPEG,
"image/pjpeg", JPEG,
"image/png", PNG,
"image/tiff", TIFF,
"image/x-xbitmap", XBM,
"image/x-bitmap", XBM,
"image/xbitmap", XBM,
"application/postscript", POSTSCRIPT,
"application/pdf", PDF,
"application/octet-stream", SUFFIX,
"application/zip", ZIP,
0, SUFFIX
};
int content2type(char *s, char *name){
int type;
type=klook(s, content);
if(type==SUFFIX) type=suffix2type(name);
return type;
}
Kind encoding[]={
"x-compress", COMPRESS,
"compress", COMPRESS,
"x-gzip", GUNZIP,
"gzip", GUNZIP,
0, 0
};
int encoding2type(char *s){
return klook(s, encoding);
}

View file

@ -18,9 +18,7 @@ char *pixcmd[]={
[GIF] "gif -9t",
[JPEG] "jpg -9t",
[PNG] "png -9t",
[PIC] "fb/3to1 /lib/fb/cmap/rgbv",
[TIFF] "/sys/lib/mothra/tiffcvt",
[XBM] "fb/xbm2pic",
[BMP] "bmp -9t",
};
void storebitmap(Rtext *t, Image *b){
@ -34,7 +32,7 @@ void getimage(Rtext *t, Www *w){
Action *ap;
Url url;
Image *b;
int fd;
int fd, typ;
char err[512];
Pix *p;
@ -56,17 +54,12 @@ void getimage(Rtext *t, Www *w){
close(fd);
return;
}
if(url.type!=GIF
&& url.type!=JPEG
&& url.type!=PNG
&& url.type!=PIC
&& url.type!=TIFF
&& url.type!=XBM){
typ = snooptype(fd);
if(typ < 0 || typ >= nelem(pixcmd) || pixcmd[typ] == nil){
werrstr("unknown image type");
goto Err;
}
if((fd = pipeline(pixcmd[url.type], fd)) < 0)
if((fd = pipeline(pixcmd[typ], fd)) < 0)
goto Err;
if(ap->width>0 || ap->height>0){
char buf[80];

View file

@ -70,7 +70,6 @@ struct Hglob{
char *etext; /* end of text buffer */
Form *form; /* data for form under construction */
Www *dst; /* where the text goes */
char charset[NNAME];
};
/*

View file

@ -3,7 +3,7 @@
TARG=mothra
LIB=libpanel/libpanel.$O.a
CFILES= \
filetype.c \
snoop.c \
forms.c \
getpix.c \
html.syntax.c \

View file

@ -30,7 +30,6 @@ Url defurl={
"http://cat-v.org/",
"",
"",
HTML,
};
Url badurl={
"",
@ -38,7 +37,6 @@ Url badurl={
"No file loaded",
"",
"",
HTML,
};
Cursor patientcurs={
0, 0,
@ -816,7 +814,6 @@ int fileurlopen(Url *url){
memset(url->fullname, 0, sizeof(url->fullname));
strcpy(url->fullname, "file:");
fd2path(fd, url->fullname+5, sizeof(url->fullname)-6);
url->type = content2type("application/octet-stream", url->fullname);
return fd;
}
@ -870,20 +867,6 @@ int urlopen(Url *url, int method, char *body){
snprint(buf, sizeof buf, "%s/%d/parsed", mtpt, conn);
readstr(url->fullname, sizeof(url->fullname), buf, "url");
readstr(url->tag, sizeof(url->tag), buf, "fragment");
snprint(buf, sizeof buf, "%s/%d", mtpt, conn);
readstr(buf, sizeof buf, buf, "contenttype");
url->charset[0] = 0;
if(p = cistrstr(buf, "charset=")){
p += 8;
strncpy(url->charset, p, sizeof(url->charset));
if(p = strchr(url->charset, ';'))
*p = 0;
}
if(p = strchr(buf, ';'))
*p = 0;
url->type = content2type(buf, url->fullname);
close(ctlfd);
return fd;
}
@ -931,9 +914,7 @@ void seturl(Url *url, char *urlname, char *base){
strncpy(url->reltext, urlname, sizeof(url->reltext));
strncpy(url->basename, base, sizeof(url->basename));
url->fullname[0] = 0;
url->charset[0] = 0;
url->tag[0] = 0;
url->type = 0;
url->map = 0;
}
Url *copyurl(Url *u){
@ -951,7 +932,7 @@ void freeurl(Url *u){
* get the file at the given url
*/
void geturl(char *urlname, int method, char *body, int cache, int map){
int i, fd;
int i, fd, typ;
char cmd[NNAME];
int pfd[2];
Www *w;
@ -968,18 +949,17 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
break;
}
message("getting %s", selection->fullname);
if(selection->type&COMPRESS)
fd=pipeline("/bin/uncompress", fd);
else if(selection->type&GUNZIP)
typ = snooptype(fd);
if(typ == GUNZIP){
fd=pipeline("/bin/gunzip", fd);
switch(selection->type&~COMPRESSION){
typ = snooptype(fd);
}
switch(typ){
default:
message("Bad type %x in geturl", selection->type);
message("Bad type %x in geturl", typ);
break;
case HTML:
snprint(cmd, sizeof(cmd), selection->charset[0] ?
"/bin/uhtml -c %s" : "/bin/uhtml", selection->charset);
fd = pipeline(cmd, fd);
fd = pipeline("/bin/uhtml", fd);
case PLAIN:
w = www(i = wwwtop++);
if(i >= NWWW){
@ -1005,24 +985,18 @@ void geturl(char *urlname, int method, char *body, int cache, int map){
w->url=copyurl(selection);
w->finished = 0;
w->alldone = 0;
gettext(w, fd, selection->type&~COMPRESSION);
gettext(w, fd, typ);
plinitlist(list, PACKN|FILLX, genwww, 8, doprev);
if(defdisplay) pldraw(list, screen);
setcurrent(i, selection->tag);
break;
case POSTSCRIPT:
case GIF:
case JPEG:
case PNG:
case PDF:
case BMP:
case PAGE:
filter("page -w", fd);
break;
case TIFF:
filter("/sys/lib/mothra/tiffview", fd);
break;
case XBM:
filter("fb/xbm2pic|fb/9v", fd);
break;
}
break;
}

View file

@ -27,9 +27,7 @@ struct Url{
char basename[NNAME];
char reltext[NNAME];
char tag[NNAME];
char charset[NNAME];
int type;
int map; /* is this an image map? */
int map; /* is this an image map? */
};
struct Www{
Url *url;
@ -43,29 +41,15 @@ struct Www{
int alldone; /* page will not change further -- used to adjust cursor */
};
/*
* url reference types -- COMPRESS and GUNZIP are flags that can modify any other type
* Changing these in a non-downward compatible way spoils cache entries
*/
enum{
GIF=1,
HTML,
JPEG,
PIC,
TIFF,
AUDIO,
PLAIN,
XBM,
POSTSCRIPT,
FORWARD,
PDF,
SUFFIX,
ZIP,
HTML,
GIF,
JPEG,
PNG,
COMPRESS=16,
GUNZIP=32,
COMPRESSION=16+32,
BMP,
GUNZIP,
PAGE,
};
/*
@ -102,9 +86,7 @@ void *emalloc(int);
void *emallocz(int, int);
void setbitmap(Rtext *);
void message(char *, ...);
int suffix2type(char *);
int content2type(char *, char *);
int encoding2type(char *);
int snooptype(int fd);
void mkfieldpanel(Rtext *);
void geturl(char *, int, char *, int, int);
char version[];

View file

@ -609,8 +609,6 @@ void plrdhtml(char *name, int fd, Www *dst){
dst->title[0]='\0';
g.spacc=0;
g.form=0;
g.charset[0] = '\0';
strncpy(g.charset, dst->url->charset, sizeof(g.charset));
for(;;) switch(pl_gettoken(&g)){
case TAG:

View file

@ -0,0 +1,65 @@
#include <u.h>
#include <libc.h>
#include <draw.h>
#include <event.h>
#include <panel.h>
#include <ctype.h>
#include "mothra.h"
int
snooptype(int fd)
{
int pfd[2], typ, n;
char buf[1024];
typ = PLAIN;
if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
return typ;
buf[n] = 0;
if(cistrstr(buf, "<?xml") ||
cistrstr(buf, "<!DOCTYPE") ||
cistrstr(buf, "<HTML"))
typ = HTML;
else if(memcmp(buf, "\x1F\x8B", 2) == 0)
typ = GUNZIP;
else if(memcmp(buf, "\377\330\377", 3) == 0)
typ = JPEG;
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
typ = PNG;
else if(memcmp(buf, "GIF", 3) == 0)
typ = GIF;
else if(memcmp(buf, "BM", 2) == 0)
typ = BMP;
else if(memcmp(buf, "PK\x03\x04", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
typ = PAGE;
else if(memcmp(buf, "x T ", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
typ = PAGE;
if(pipe(pfd) >= 0){
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1:
break;
case 0:
close(pfd[0]);
do {
if(write(pfd[1], buf, n) != n)
break;
} while((n = read(fd, buf, sizeof(buf))) > 0);
exits(nil);
default:
dup(pfd[0], fd);
}
close(pfd[1]);
close(pfd[0]);
}
return typ;
}