use file(1) in page and mothra to detect file type

This commit is contained in:
cinap_lenrek 2011-10-04 18:48:31 +02:00
parent b7811b8bc6
commit 3efbb4fa00
4 changed files with 264 additions and 139 deletions

View file

@ -589,7 +589,7 @@ Filemagic long0tab[] = {
0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET,
0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET,
0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip",
070707, 0xFFFF, "cpio archive\n", OCTET,
070707, 0xFFFF, "cpio archive\n", "application/x-cpio",
0x2F7, 0xFFFF, "tex dvi\n", "application/dvi",
0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg",
0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be",
@ -752,8 +752,7 @@ istar(void)
chksum = strtol(hdr->chksum, 0, 8);
if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
if (strcmp(hdr->magic, "ustar") == 0)
print(mime? "application/x-ustar\n":
"posix tar archive\n");
print(mime? "application/x-ustar\n": "posix tar archive\n");
else
print(mime? "application/x-tar\n": "tar archive\n");
return 1;
@ -772,6 +771,9 @@ struct FILE_STRING
char *mime;
} file_string[] =
{
"\x1f\x9d", "compressed", 2, "application/x-compress",
"\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
"BZh", "bzip2 compressed", 3, "application/x-bzip2",
"!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
"!<arch>\n", "archive", 8, "application/octet-stream",
"070707", "cpio archive - ascii header", 6, "application/octet-stream",
@ -787,15 +789,19 @@ struct FILE_STRING
"GIF", "GIF image", 3, "image/gif",
"\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
"%PDF", "PDF", 4, "application/pdf",
"<html>\n", "HTML file", 7, "text/html",
"<HTML>\n", "HTML file", 7, "text/html",
"<!DOCTYPE", "HTML file", 9, "text/html",
"<!doctype", "HTML file", 9, "text/html",
"<!--", "HTML file", 4, "text/html",
"<html>", "HTML file", 6, "text/html",
"<HTML>", "HTML file", 6, "text/html",
"<?xml", "HTML file", 5, "text/html",
"\111\111\052\000", "tiff", 4, "image/tiff",
"\115\115\000\052", "tiff", 4, "image/tiff",
"\377\330\377\340", "jpeg", 4, "image/jpeg",
"\377\330\377\341", "jpeg", 4, "image/jpeg",
"\377\330\377\333", "jpeg", 4, "image/jpeg",
"BM", "bmp", 2, "image/bmp",
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream",
"BM", "bmp", 2, "image/bmp",
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
"<MakerFile ", "FrameMaker file", 11, "application/framemaker",
"\033E\033", "HP PCL printer data", 3, OCTET,
"\033%-12345X", "HPJCL file", 9, "application/hpjcl",
@ -916,21 +922,41 @@ iff(void)
char* html_string[] =
{
"title",
"body",
"?xml",
"!doctype",
"html",
"head",
"title",
"link",
"meta",
"body",
"script",
"strong",
"input",
"table",
"form",
"font",
"div",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"ol",
"ul",
"li",
"dl",
"br",
"hr",
"em",
"th",
"tr",
"td",
"p",
"b",
"i",
"a",
0,
};
@ -952,13 +978,13 @@ ishtml(void)
if(*p == '/')
p++;
q = p;
while(p < buf+nbuf && *p != '>')
while(p < buf+nbuf && isalpha(*p))
p++;
if (p >= buf+nbuf)
break;
for(i = 0; html_string[i]; i++) {
if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
if(count++ > 4) {
if(++count > 2) {
print(mime ? "text/html\n" : "HTML file\n");
return 1;
}
@ -1145,13 +1171,13 @@ ismung(void)
cs /= 8.;
if(cs <= 24.322) {
if(buf[0]==0x1f && buf[1]==0x9d)
print(mime ? OCTET : "compressed\n");
print(mime ? "application/x-compress" : "compressed\n");
else
if(buf[0]==0x1f && buf[1]==0x8b)
print(mime ? OCTET : "gzip compressed\n");
print(mime ? "application/x-gzip" : "gzip compressed\n");
else
if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
print(mime ? OCTET : "bzip2 compressed\n");
print(mime ? "application/x-bzip2" : "bzip2 compressed\n");
else
print(mime ? OCTET : "encrypted\n");
return 1;

View file

@ -44,11 +44,14 @@ struct Www{
enum{
PLAIN,
HTML,
GIF,
JPEG,
PNG,
BMP,
GUNZIP,
COMPRESS,
PAGE,
};

View file

@ -6,61 +6,122 @@
#include <ctype.h>
#include "mothra.h"
int
filetype(int fd, char *typ, int ntyp)
{
int ifd[2], ofd[2], xfd[2], n;
char *argv[3], buf[4096];
typ[0] = 0;
if((n = readn(fd, buf, sizeof(buf))) < 0)
return -1;
if(n == 0)
return 0;
if(pipe(ifd) < 0)
return -1;
if(pipe(ofd) < 0){
Err1:
close(ifd[0]);
close(ifd[1]);
return -1;
}
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1:
close(ofd[0]);
close(ofd[1]);
goto Err1;
case 0:
dup(ifd[1], 0);
dup(ofd[1], 1);
close(ifd[1]);
close(ifd[0]);
close(ofd[1]);
close(ofd[0]);
close(fd);
argv[0] = "file";
argv[1] = "-m";
argv[2] = 0;
exec("/bin/file", argv);
}
close(ifd[1]);
close(ofd[1]);
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
close(fd);
close(ofd[0]);
write(ifd[0], buf, n);
exits(nil);
}
close(ifd[0]);
if(pipe(xfd) < 0){
close(ofd[0]);
return -1;
}
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1:
break;
case 0:
close(ofd[0]);
close(xfd[0]);
do {
if(write(xfd[1], buf, n) != n)
break;
} while((n = read(fd, buf, sizeof(buf))) > 0);
exits(nil);
default:
dup(xfd[0], fd);
}
close(xfd[0]);
close(xfd[1]);
if((n = readn(ofd[0], typ, ntyp-1)) < 0)
n = 0;
close(ofd[0]);
while(n > 0 && typ[n-1] == '\n')
n--;
typ[n] = 0;
return 0;
}
int
snooptype(int fd)
{
int pfd[2], typ, n;
char buf[1024];
static struct {
char *typ;
int val;
} tab[] = {
"text/plain", PLAIN,
"text/html", HTML,
typ = PLAIN;
if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
return typ;
buf[n] = 0;
if(cistrstr(buf, "<?xml") ||
cistrstr(buf, "<!DOCTYPE") ||
cistrstr(buf, "<HTML") ||
cistrstr(buf, "<head"))
typ = HTML;
else if(memcmp(buf, "\x1F\x8B", 2) == 0)
typ = GUNZIP;
else if(memcmp(buf, "\377\330\377", 3) == 0)
typ = JPEG;
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
typ = PNG;
else if(memcmp(buf, "GIF", 3) == 0)
typ = GIF;
else if(memcmp(buf, "BM", 2) == 0)
typ = BMP;
else if(memcmp(buf, "PK\x03\x04", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
typ = PAGE;
else if(memcmp(buf, "x T ", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
typ = PAGE;
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
typ = PAGE;
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
typ = PAGE;
if(pipe(pfd) >= 0){
switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
case -1:
break;
case 0:
close(pfd[0]);
do {
if(write(pfd[1], buf, n) != n)
break;
} while((n = read(fd, buf, sizeof(buf))) > 0);
exits(nil);
default:
dup(pfd[0], fd);
}
close(pfd[1]);
close(pfd[0]);
}
return typ;
"image/jpeg", JPEG,
"image/gif", GIF,
"image/png", PNG,
"image/bmp", BMP,
"application/x-gzip", GUNZIP,
"application/x-compress", COMPRESS,
"application/pdf", PAGE,
"application/postscript", PAGE,
"application/ghostscript", PAGE,
"application/troff", PAGE,
"application/zip", PAGE,
"application/x-tar", PAGE,
"application/x-ustar", PAGE,
"image/", PAGE,
"text/", PLAIN,
};
char buf[128];
int i;
if(filetype(fd, buf, sizeof(buf)) < 0)
return -1;
for(i=0; i<nelem(tab); i++)
if(strncmp(buf, tab[i].typ, strlen(tab[i].typ)) == 0)
return tab[i].val;
return -1;
}

View file

@ -144,11 +144,11 @@ resizewin(Point size)
}
int
createtmp(ulong id, char *pfx)
createtmp(char *pfx)
{
static ulong id = 1;
char nam[64];
snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id);
snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id++);
return create(nam, OEXCL|ORCLOSE|ORDWR, 0600);
}
@ -219,7 +219,7 @@ int
popenfile(Page*);
int
popenconv(Page *p)
popenimg(Page *p)
{
char nam[NPATH];
int fd;
@ -249,6 +249,18 @@ popenconv(Page *p)
return fd;
}
int
popenfilter(Page *p)
{
seek(p->fd, 0, 0);
if(p->data){
pipeline(p->fd, "%s", (char*)p->data);
p->data = nil;
}
p->open = popenfile;
return p->open(p);
}
int
popentape(Page *p)
{
@ -529,14 +541,14 @@ popengs(Page *p)
while((n = read(pdat[0], buf, sizeof(buf))) >= 0){
if(ofd >= 0 && (n <= 0 || infernobithdr(buf, n))){
snprint(nam, sizeof nam, "%d", i);
addpage(p, nam, popenconv, nil, ofd);
addpage(p, nam, popenimg, nil, ofd);
ofd = -1;
}
if(n <= 0)
break;
if(ofd < 0){
snprint(nam, sizeof nam, "%.4d", ++i);
if((ofd = createtmp((ulong)p, nam)) < 0)
if((ofd = createtmp(nam)) < 0)
ofd = dup(nullfd, -1);
}
if(write(ofd, buf, n) != n)
@ -552,6 +564,51 @@ Out:
return -1;
}
int
filetype(char *buf, int nbuf, char *typ, int ntyp)
{
int n, ifd[2], ofd[2];
char *argv[3];
typ[0] = 0;
if(pipe(ifd) < 0)
return -1;
if(pipe(ofd) < 0){
close(ifd[0]);
close(ifd[1]);
return -1;
}
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
dup(ifd[1], 0);
dup(ofd[1], 1);
close(ifd[1]);
close(ifd[0]);
close(ofd[1]);
close(ofd[0]);
argv[0] = "file";
argv[1] = "-m";
argv[2] = 0;
exec("/bin/file", argv);
}
close(ifd[1]);
close(ofd[1]);
if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
close(ofd[0]);
write(ifd[0], buf, nbuf);
exits(nil);
}
close(ifd[0]);
if((n = readn(ofd[0], typ, ntyp-1)) < 0)
n = 0;
close(ofd[0]);
while(n > 0 && typ[n-1] == '\n')
n--;
typ[n] = 0;
return 0;
}
int
dircmp(void *p1, void *p2)
{
@ -566,7 +623,33 @@ dircmp(void *p1, void *p2)
int
popenfile(Page *p)
{
char buf[NBUF], *file;
static struct {
char *typ;
void *popen;
void *data;
} tab[] = {
"application/pdf", popengs, nil,
"application/postscript", popengs, nil,
"application/troff", popengs, "lp -dstdout",
"text/plain", popengs, "lp -dstdout",
"text/html", popengs, "uhtml | html2ms | tbl | troff -ms | lp -dstdout",
"application/dvi", popengs, "dvips -Pps -r0 -q1 -f1",
"application/doc", popengs, "doc2ps",
"application/zip", popentape, "fs/zipfs",
"application/x-tar", popentape, "fs/tarfs",
"application/x-ustar", popentape, "fs/tarfs",
"application/x-compress", popenfilter, "uncompress",
"application/x-gzip", popenfilter, "gunzip",
"application/x-bzip2", popenfilter, "bunzip2",
"image/gif", popenimg, "gif -t9",
"image/jpeg", popenimg, "jpg -t9",
"image/png", popenimg, "png -t9",
"image/ppm", popenimg, "ppm -t9",
"image/bmp", popenimg, "bmp -t9",
"image/p9bit", popenimg, nil,
};
char buf[NBUF], typ[128], *file;
int i, n, fd, tfd;
Dir *d;
@ -610,69 +693,23 @@ popenfile(Page *p)
}
free(d);
memset(buf, 0, 32+1);
if((n = read(fd, buf, 32)) <= 0)
memset(buf, 0, NBUF/2);
if((n = readn(fd, buf, NBUF/2)) <= 0)
goto Err1;
if(infernobithdr(buf, n))
strcpy(typ, "image/p9bit");
else
filetype(buf, n, typ, sizeof(typ));
for(i=0; i<nelem(tab); i++)
if(strncmp(typ, tab[i].typ, strlen(tab[i].typ)) == 0)
break;
if(i == nelem(tab)){
werrstr("unknown image format: %s", typ);
goto Err1;
}
p->fd = fd;
p->data = nil;
p->open = popenconv;
if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
p->open = popengs;
else if(memcmp(buf, "x T ", 4) == 0){
p->data = "lp -dstdout";
p->open = popengs;
}
else if(cistrstr(buf, "<?xml") ||
cistrstr(buf, "<!DOCTYPE") ||
cistrstr(buf, "<HTML")){
p->data = "uhtml | html2ms | tbl | troff -ms | lp -dstdout";
p->open = popengs;
}
else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0){
p->data = "dvips -Pps -r0 -q1 -f1";
p->open = popengs;
}
else if(memcmp(buf, "\x1F\x8B", 2) == 0){
p->data = "gunzip";
p->open = popengs;
}
else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0){
p->data = "doc2ps";
p->open = popengs;
}
else if(memcmp(buf, "PK\x03\x04", 4) == 0){
p->data = "fs/zipfs";
p->open = popentape;
}else if(memcmp(buf, "GIF", 3) == 0)
p->data = "gif -t9";
else if(memcmp(buf, "\111\111\052\000", 4) == 0)
p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
else if(memcmp(buf, "\115\115\000\052", 4) == 0)
p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
else if(memcmp(buf, "\377\330\377", 3) == 0)
p->data = "jpg -t9";
else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
p->data = "png -t9";
else if(memcmp(buf, "\0PC Research, Inc", 17) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "TYPE=ccitt-g31", 14) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "II*", 3) == 0)
p->data = "aux/g3p9bit -g";
else if(memcmp(buf, "TYPE=", 5) == 0)
p->data = "fb/3to1 rgbv |fb/pcp -tplan9";
else if(buf[0] == 'P' && '0' <= buf[1] && buf[1] <= '9')
p->data = "ppm -t9";
else if(memcmp(buf, "BM", 2) == 0)
p->data = "bmp -t9";
else if(infernobithdr(buf, n))
p->data = nil;
else {
werrstr("unknown image format");
goto Err1;
}
p->data = tab[i].data;
p->open = tab[i].popen;
if(seek(fd, 0, 0) < 0)
goto Noseek;
if((i = read(fd, buf+n, n)) < 0)
@ -680,7 +717,7 @@ popenfile(Page *p)
if(i != n || memcmp(buf, buf+n, i)){
n += i;
Noseek:
if((tfd = createtmp((ulong)p, "file")) < 0)
if((tfd = createtmp("file")) < 0)
goto Err1;
while(n > 0){
if(write(tfd, buf, n) != n)
@ -1365,9 +1402,7 @@ main(int argc, char *argv[])
if(s && strcmp(s, "quit")==0)
exits(0);
if(s && strcmp(s, "showdata")==0){
static ulong plumbid;
if((fd = createtmp(plumbid++, "plumb")) < 0){
if((fd = createtmp("plumb")) < 0){
fprint(2, "plumb: createtmp: %r\n");
goto Plumbfree;
}