htmlfmt: use uhtml for character set conversion
This commit is contained in:
parent
66f76c2821
commit
679b092ee0
3 changed files with 34 additions and 38 deletions
|
@ -28,12 +28,10 @@ struct URLwin
|
|||
extern char* url;
|
||||
extern int aflag;
|
||||
extern int width;
|
||||
extern int defcharset;
|
||||
|
||||
extern char* loadhtml(int);
|
||||
|
||||
extern char* readfile(char*, char*, int*);
|
||||
extern int charset(char*);
|
||||
extern void* emalloc(ulong);
|
||||
extern char* estrdup(char*);
|
||||
extern char* estrstrdup(char*, char*);
|
||||
|
|
|
@ -285,40 +285,13 @@ rerender(URLwin *u)
|
|||
free(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* Somewhat of a hack. Not a full parse, just looks for strings in the beginning
|
||||
* of the document (cistrstr only looks at first somewhat bytes).
|
||||
*/
|
||||
int
|
||||
charset(char *s)
|
||||
{
|
||||
char *meta, *emeta, *charset;
|
||||
|
||||
if(defcharset == 0)
|
||||
defcharset = ISO_8859_1;
|
||||
meta = cistrstr(s, "<meta");
|
||||
if(meta == nil)
|
||||
return defcharset;
|
||||
for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
|
||||
;
|
||||
charset = cistrstr(s, "charset=");
|
||||
if(charset == nil)
|
||||
return defcharset;
|
||||
charset += 8;
|
||||
if(*charset == '"')
|
||||
charset++;
|
||||
if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
|
||||
return UTF_8;
|
||||
return defcharset;
|
||||
}
|
||||
|
||||
void
|
||||
rendertext(URLwin *u, Bytes *b)
|
||||
{
|
||||
Rune *rurl;
|
||||
|
||||
rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
|
||||
u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
|
||||
rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8);
|
||||
u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo);
|
||||
// free(rurl);
|
||||
|
||||
rerender(u);
|
||||
|
|
|
@ -8,7 +8,34 @@
|
|||
char *url = "";
|
||||
int aflag;
|
||||
int width = 70;
|
||||
int defcharset;
|
||||
char *defcharset = "latin1";
|
||||
|
||||
int
|
||||
uhtml(int fd)
|
||||
{
|
||||
int p[2];
|
||||
|
||||
if(pipe(p) < 0)
|
||||
return fd;
|
||||
switch(fork()){
|
||||
case -1:
|
||||
break;
|
||||
case 0:
|
||||
dup(fd, 0);
|
||||
dup(p[1], 1);
|
||||
close(p[1]);
|
||||
close(p[0]);
|
||||
execl("/bin/uhtml", "uhtml", "-c", defcharset, nil);
|
||||
execl("/bin/cat", "cat", nil);
|
||||
exits("exec");
|
||||
default:
|
||||
dup(p[0], fd);
|
||||
break;
|
||||
}
|
||||
close(p[0]);
|
||||
close(p[1]);
|
||||
return fd;
|
||||
}
|
||||
|
||||
void
|
||||
usage(void)
|
||||
|
@ -21,7 +48,7 @@ void
|
|||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, fd;
|
||||
char *p, *err, *file;
|
||||
char *err, *file;
|
||||
char errbuf[ERRMAX];
|
||||
|
||||
ARGBEGIN{
|
||||
|
@ -29,9 +56,7 @@ main(int argc, char *argv[])
|
|||
aflag++;
|
||||
break;
|
||||
case 'c':
|
||||
p = smprint("<meta charset=\"%s\">", EARGF(usage()));
|
||||
defcharset = charset(p);
|
||||
free(p);
|
||||
defcharset = EARGF(usage());
|
||||
break;
|
||||
case 'l': case 'w':
|
||||
err = EARGF(usage());
|
||||
|
@ -50,7 +75,7 @@ main(int argc, char *argv[])
|
|||
err = nil;
|
||||
file = "<stdin>";
|
||||
if(argc == 0)
|
||||
err = loadhtml(0);
|
||||
err = loadhtml(uhtml(0));
|
||||
else
|
||||
for(i=0; err==nil && i<argc; i++){
|
||||
file = argv[i];
|
||||
|
@ -60,7 +85,7 @@ main(int argc, char *argv[])
|
|||
err = errbuf;
|
||||
break;
|
||||
}
|
||||
err = loadhtml(fd);
|
||||
err = loadhtml(uhtml(fd));
|
||||
close(fd);
|
||||
if(err)
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue