use file(1) in page and mothra to detect file type

This commit is contained in:
cinap_lenrek 2011-10-04 18:48:31 +02:00
parent b7811b8bc6
commit 3efbb4fa00
4 changed files with 264 additions and 139 deletions

View file

@ -589,7 +589,7 @@ Filemagic long0tab[] = {
0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET, 0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET,
0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET,
0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip",
070707, 0xFFFF, "cpio archive\n", OCTET, 070707, 0xFFFF, "cpio archive\n", "application/x-cpio",
0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi",
0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg", 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg",
0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be", 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be",
@ -752,8 +752,7 @@ istar(void)
chksum = strtol(hdr->chksum, 0, 8); chksum = strtol(hdr->chksum, 0, 8);
if (hdr->name[0] != '\0' && checksum(hp) == chksum) { if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
if (strcmp(hdr->magic, "ustar") == 0) if (strcmp(hdr->magic, "ustar") == 0)
print(mime? "application/x-ustar\n": print(mime? "application/x-ustar\n": "posix tar archive\n");
"posix tar archive\n");
else else
print(mime? "application/x-tar\n": "tar archive\n"); print(mime? "application/x-tar\n": "tar archive\n");
return 1; return 1;
@ -772,6 +771,9 @@ struct FILE_STRING
char *mime; char *mime;
} file_string[] = } file_string[] =
{ {
"\x1f\x9d", "compressed", 2, "application/x-compress",
"\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
"BZh", "bzip2 compressed", 3, "application/x-bzip2",
"!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
"!<arch>\n", "archive", 8, "application/octet-stream", "!<arch>\n", "archive", 8, "application/octet-stream",
"070707", "cpio archive - ascii header", 6, "application/octet-stream", "070707", "cpio archive - ascii header", 6, "application/octet-stream",
@ -787,15 +789,19 @@ struct FILE_STRING
"GIF", "GIF image", 3, "image/gif", "GIF", "GIF image", 3, "image/gif",
"\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
"%PDF", "PDF", 4, "application/pdf", "%PDF", "PDF", 4, "application/pdf",
"<html>\n", "HTML file", 7, "text/html", "<!DOCTYPE", "HTML file", 9, "text/html",
"<HTML>\n", "HTML file", 7, "text/html", "<!doctype", "HTML file", 9, "text/html",
"<!--", "HTML file", 4, "text/html",
"<html>", "HTML file", 6, "text/html",
"<HTML>", "HTML file", 6, "text/html",
"<?xml", "HTML file", 5, "text/html",
"\111\111\052\000", "tiff", 4, "image/tiff", "\111\111\052\000", "tiff", 4, "image/tiff",
"\115\115\000\052", "tiff", 4, "image/tiff", "\115\115\000\052", "tiff", 4, "image/tiff",
"\377\330\377\340", "jpeg", 4, "image/jpeg", "\377\330\377\340", "jpeg", 4, "image/jpeg",
"\377\330\377\341", "jpeg", 4, "image/jpeg", "\377\330\377\341", "jpeg", 4, "image/jpeg",
"\377\330\377\333", "jpeg", 4, "image/jpeg", "\377\330\377\333", "jpeg", 4, "image/jpeg",
"BM", "bmp", 2, "image/bmp", "BM", "bmp", 2, "image/bmp",
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
"<MakerFile ", "FrameMaker file", 11, "application/framemaker", "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
"\033E\033", "HP PCL printer data", 3, OCTET, "\033E\033", "HP PCL printer data", 3, OCTET,
"\033%-12345X", "HPJCL file", 9, "application/hpjcl", "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
@ -916,21 +922,41 @@ iff(void)
char* html_string[] = char* html_string[] =
{ {
"title", "?xml",
"body", "!doctype",
"html",
"head", "head",
"title",
"link",
"meta",
"body",
"script",
"strong", "strong",
"input",
"table",
"form",
"font",
"div",
"h1", "h1",
"h2", "h2",
"h3", "h3",
"h4", "h4",
"h5", "h5",
"h6", "h6",
"ol",
"ul", "ul",
"li", "li",
"dl", "dl",
"br", "br",
"hr",
"em", "em",
"th",
"tr",
"td",
"p",
"b",
"i",
"a",
0, 0,
}; };
@ -952,13 +978,13 @@ ishtml(void)
if(*p == '/') if(*p == '/')
p++; p++;
q = p; q = p;
while(p < buf+nbuf && *p != '>') while(p < buf+nbuf && isalpha(*p))
p++; p++;
if (p >= buf+nbuf) if (p >= buf+nbuf)
break; break;
for(i = 0; html_string[i]; i++) { for(i = 0; html_string[i]; i++) {
if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
if(count++ > 4) { if(++count > 2) {
print(mime ? "text/html\n" : "HTML file\n"); print(mime ? "text/html\n" : "HTML file\n");
return 1; return 1;
} }
@ -1145,13 +1171,13 @@ ismung(void)
cs /= 8.; cs /= 8.;
if(cs <= 24.322) { if(cs <= 24.322) {
if(buf[0]==0x1f && buf[1]==0x9d) if(buf[0]==0x1f && buf[1]==0x9d)
print(mime ? OCTET : "compressed\n"); print(mime ? "application/x-compress" : "compressed\n");
else else
if(buf[0]==0x1f && buf[1]==0x8b) if(buf[0]==0x1f && buf[1]==0x8b)
print(mime ? OCTET : "gzip compressed\n"); print(mime ? "application/x-gzip" : "gzip compressed\n");
else else
if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h') if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
print(mime ? OCTET : "bzip2 compressed\n"); print(mime ? "application/x-bzip2" : "bzip2 compressed\n");
else else
print(mime ? OCTET : "encrypted\n"); print(mime ? OCTET : "encrypted\n");
return 1; return 1;

View file

@ -44,11 +44,14 @@ struct Www{
enum{ enum{
PLAIN, PLAIN,
HTML, HTML,
GIF, GIF,
JPEG, JPEG,
PNG, PNG,
BMP, BMP,
GUNZIP, GUNZIP,
COMPRESS,
PAGE, PAGE,
}; };

View file

@ -7,60 +7,121 @@
#include "mothra.h" #include "mothra.h"
int int
snooptype(int fd) filetype(int fd, char *typ, int ntyp)
{ {
int pfd[2], typ, n; int ifd[2], ofd[2], xfd[2], n;
char buf[1024]; char *argv[3], buf[4096];
typ = PLAIN; typ[0] = 0;
if((n = readn(fd, buf, sizeof(buf)-1)) < 0) if((n = readn(fd, buf, sizeof(buf))) < 0)
return typ; return -1;
buf[n] = 0; if(n == 0)
if(cistrstr(buf, "<?xml") || return 0;
cistrstr(buf, "<!DOCTYPE") || if(pipe(ifd) < 0)
cistrstr(buf, "<HTML") || return -1;
cistrstr(buf, "<head")) if(pipe(ofd) < 0){
typ = HTML; Err1:
else if(memcmp(buf, "\x1F\x8B", 2) == 0) close(ifd[0]);
typ = GUNZIP; close(ifd[1]);
else if(memcmp(buf, "\377\330\377", 3) == 0) return -1;
typ = JPEG; }
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0) switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
typ = PNG; case -1:
else if(memcmp(buf, "GIF", 3) == 0) close(ofd[0]);
typ = GIF; close(ofd[1]);
else if(memcmp(buf, "BM", 2) == 0) goto Err1;
typ = BMP; case 0:
else if(memcmp(buf, "PK\x03\x04", 4) == 0) dup(ifd[1], 0);
typ = PAGE; dup(ofd[1], 1);
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
typ = PAGE; close(ifd[1]);
else if(memcmp(buf, "x T ", 4) == 0) close(ifd[0]);
typ = PAGE; close(ofd[1]);
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0) close(ofd[0]);
typ = PAGE; close(fd);
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
typ = PAGE; argv[0] = "file";
else if(memcmp(buf, "\111\111\052\000", 4) == 0) argv[1] = "-m";
typ = PAGE; argv[2] = 0;
else if(memcmp(buf, "\115\115\000\052", 4) == 0) exec("/bin/file", argv);
typ = PAGE; }
if(pipe(pfd) >= 0){ close(ifd[1]);
close(ofd[1]);
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
close(fd);
close(ofd[0]);
write(ifd[0], buf, n);
exits(nil);
}
close(ifd[0]);
if(pipe(xfd) < 0){
close(ofd[0]);
return -1;
}
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){ switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1: case -1:
break; break;
case 0: case 0:
close(pfd[0]); close(ofd[0]);
close(xfd[0]);
do { do {
if(write(pfd[1], buf, n) != n) if(write(xfd[1], buf, n) != n)
break; break;
} while((n = read(fd, buf, sizeof(buf))) > 0); } while((n = read(fd, buf, sizeof(buf))) > 0);
exits(nil); exits(nil);
default: default:
dup(pfd[0], fd); dup(xfd[0], fd);
} }
close(pfd[1]); close(xfd[0]);
close(pfd[0]); close(xfd[1]);
if((n = readn(ofd[0], typ, ntyp-1)) < 0)
n = 0;
close(ofd[0]);
while(n > 0 && typ[n-1] == '\n')
n--;
typ[n] = 0;
return 0;
} }
return typ;
int
snooptype(int fd)
{
static struct {
char *typ;
int val;
} tab[] = {
"text/plain", PLAIN,
"text/html", HTML,
"image/jpeg", JPEG,
"image/gif", GIF,
"image/png", PNG,
"image/bmp", BMP,
"application/x-gzip", GUNZIP,
"application/x-compress", COMPRESS,
"application/pdf", PAGE,
"application/postscript", PAGE,
"application/ghostscript", PAGE,
"application/troff", PAGE,
"application/zip", PAGE,
"application/x-tar", PAGE,
"application/x-ustar", PAGE,
"image/", PAGE,
"text/", PLAIN,
};
char buf[128];
int i;
if(filetype(fd, buf, sizeof(buf)) < 0)
return -1;
for(i=0; i<nelem(tab); i++)
if(strncmp(buf, tab[i].typ, strlen(tab[i].typ)) == 0)
return tab[i].val;
return -1;
} }

View file

@ -144,11 +144,11 @@ resizewin(Point size)
} }
int int
createtmp(ulong id, char *pfx) createtmp(char *pfx)
{ {
static ulong id = 1;
char nam[64]; char nam[64];
snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id++);
snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id);
return create(nam, OEXCL|ORCLOSE|ORDWR, 0600); return create(nam, OEXCL|ORCLOSE|ORDWR, 0600);
} }
@ -219,7 +219,7 @@ int
popenfile(Page*); popenfile(Page*);
int int
popenconv(Page *p) popenimg(Page *p)
{ {
char nam[NPATH]; char nam[NPATH];
int fd; int fd;
@ -249,6 +249,18 @@ popenconv(Page *p)
return fd; return fd;
} }
int
popenfilter(Page *p)
{
seek(p->fd, 0, 0);
if(p->data){
pipeline(p->fd, "%s", (char*)p->data);
p->data = nil;
}
p->open = popenfile;
return p->open(p);
}
int int
popentape(Page *p) popentape(Page *p)
{ {
@ -529,14 +541,14 @@ popengs(Page *p)
while((n = read(pdat[0], buf, sizeof(buf))) >= 0){ while((n = read(pdat[0], buf, sizeof(buf))) >= 0){
if(ofd >= 0 && (n <= 0 || infernobithdr(buf, n))){ if(ofd >= 0 && (n <= 0 || infernobithdr(buf, n))){
snprint(nam, sizeof nam, "%d", i); snprint(nam, sizeof nam, "%d", i);
addpage(p, nam, popenconv, nil, ofd); addpage(p, nam, popenimg, nil, ofd);
ofd = -1; ofd = -1;
} }
if(n <= 0) if(n <= 0)
break; break;
if(ofd < 0){ if(ofd < 0){
snprint(nam, sizeof nam, "%.4d", ++i); snprint(nam, sizeof nam, "%.4d", ++i);
if((ofd = createtmp((ulong)p, nam)) < 0) if((ofd = createtmp(nam)) < 0)
ofd = dup(nullfd, -1); ofd = dup(nullfd, -1);
} }
if(write(ofd, buf, n) != n) if(write(ofd, buf, n) != n)
@ -552,6 +564,51 @@ Out:
return -1; return -1;
} }
int
filetype(char *buf, int nbuf, char *typ, int ntyp)
{
int n, ifd[2], ofd[2];
char *argv[3];
typ[0] = 0;
if(pipe(ifd) < 0)
return -1;
if(pipe(ofd) < 0){
close(ifd[0]);
close(ifd[1]);
return -1;
}
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
dup(ifd[1], 0);
dup(ofd[1], 1);
close(ifd[1]);
close(ifd[0]);
close(ofd[1]);
close(ofd[0]);
argv[0] = "file";
argv[1] = "-m";
argv[2] = 0;
exec("/bin/file", argv);
}
close(ifd[1]);
close(ofd[1]);
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
close(ofd[0]);
write(ifd[0], buf, nbuf);
exits(nil);
}
close(ifd[0]);
if((n = readn(ofd[0], typ, ntyp-1)) < 0)
n = 0;
close(ofd[0]);
while(n > 0 && typ[n-1] == '\n')
n--;
typ[n] = 0;
return 0;
}
int int
dircmp(void *p1, void *p2) dircmp(void *p1, void *p2)
{ {
@ -566,7 +623,33 @@ dircmp(void *p1, void *p2)
int int
popenfile(Page *p) popenfile(Page *p)
{ {
char buf[NBUF], *file; static struct {
char *typ;
void *popen;
void *data;
} tab[] = {
"application/pdf", popengs, nil,
"application/postscript", popengs, nil,
"application/troff", popengs, "lp -dstdout",
"text/plain", popengs, "lp -dstdout",
"text/html", popengs, "uhtml | html2ms | tbl | troff -ms | lp -dstdout",
"application/dvi", popengs, "dvips -Pps -r0 -q1 -f1",
"application/doc", popengs, "doc2ps",
"application/zip", popentape, "fs/zipfs",
"application/x-tar", popentape, "fs/tarfs",
"application/x-ustar", popentape, "fs/tarfs",
"application/x-compress", popenfilter, "uncompress",
"application/x-gzip", popenfilter, "gunzip",
"application/x-bzip2", popenfilter, "bunzip2",
"image/gif", popenimg, "gif -t9",
"image/jpeg", popenimg, "jpg -t9",
"image/png", popenimg, "png -t9",
"image/ppm", popenimg, "ppm -t9",
"image/bmp", popenimg, "bmp -t9",
"image/p9bit", popenimg, nil,
};
char buf[NBUF], typ[128], *file;
int i, n, fd, tfd; int i, n, fd, tfd;
Dir *d; Dir *d;
@ -610,69 +693,23 @@ popenfile(Page *p)
} }
free(d); free(d);
memset(buf, 0, 32+1); memset(buf, 0, NBUF/2);
if((n = read(fd, buf, 32)) <= 0) if((n = readn(fd, buf, NBUF/2)) <= 0)
goto Err1; goto Err1;
if(infernobithdr(buf, n))
strcpy(typ, "image/p9bit");
else
filetype(buf, n, typ, sizeof(typ));
for(i=0; i<nelem(tab); i++)
if(strncmp(typ, tab[i].typ, strlen(tab[i].typ)) == 0)
break;
if(i == nelem(tab)){
werrstr("unknown image format: %s", typ);
goto Err1;
}
p->fd = fd; p->fd = fd;
p->data = nil; p->data = tab[i].data;
p->open = popenconv; p->open = tab[i].popen;
if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
p->open = popengs;
else if(memcmp(buf, "x T ", 4) == 0){
p->data = "lp -dstdout";
p->open = popengs;
}
else if(cistrstr(buf, "<?xml") ||
cistrstr(buf, "<!DOCTYPE") ||
cistrstr(buf, "<HTML")){
p->data = "uhtml | html2ms | tbl | troff -ms | lp -dstdout";
p->open = popengs;
}
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0){
p->data = "dvips -Pps -r0 -q1 -f1";
p->open = popengs;
}
else if(memcmp(buf, "\x1F\x8B", 2) == 0){
p->data = "gunzip";
p->open = popengs;
}
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0){
p->data = "doc2ps";
p->open = popengs;
}
else if(memcmp(buf, "PK\x03\x04", 4) == 0){
p->data = "fs/zipfs";
p->open = popentape;
}else if(memcmp(buf, "GIF", 3) == 0)
p->data = "gif -t9";
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
else if(memcmp(buf, "\377\330\377", 3) == 0)
p->data = "jpg -t9";
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
p->data = "png -t9";
else if(memcmp(buf, "\0PC Research, Inc", 17) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "TYPE=ccitt-g31", 14) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "II*", 3) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "TYPE=", 5) == 0)
p->data = "fb/3to1 rgbv |fb/pcp -tplan9";
else if(buf[0] == 'P' && '0' <= buf[1] && buf[1] <= '9')
p->data = "ppm -t9";
else if(memcmp(buf, "BM", 2) == 0)
p->data = "bmp -t9";
else if(infernobithdr(buf, n))
p->data = nil;
else {
werrstr("unknown image format");
goto Err1;
}
if(seek(fd, 0, 0) < 0) if(seek(fd, 0, 0) < 0)
goto Noseek; goto Noseek;
if((i = read(fd, buf+n, n)) < 0) if((i = read(fd, buf+n, n)) < 0)
@ -680,7 +717,7 @@ popenfile(Page *p)
if(i != n || memcmp(buf, buf+n, i)){ if(i != n || memcmp(buf, buf+n, i)){
n += i; n += i;
Noseek: Noseek:
if((tfd = createtmp((ulong)p, "file")) < 0) if((tfd = createtmp("file")) < 0)
goto Err1; goto Err1;
while(n > 0){ while(n > 0){
if(write(tfd, buf, n) != n) if(write(tfd, buf, n) != n)
@ -1365,9 +1402,7 @@ main(int argc, char *argv[])
if(s && strcmp(s, "quit")==0) if(s && strcmp(s, "quit")==0)
exits(0); exits(0);
if(s && strcmp(s, "showdata")==0){ if(s && strcmp(s, "showdata")==0){
static ulong plumbid; if((fd = createtmp("plumb")) < 0){
if((fd = createtmp(plumbid++, "plumb")) < 0){
fprint(2, "plumb: createtmp: %r\n"); fprint(2, "plumb: createtmp: %r\n");
goto Plumbfree; goto Plumbfree;
} }