htmlfmt: use uhtml for character set conversion

This commit is contained in:
cinap_lenrek 2014-05-12 02:38:53 +02:00
parent 66f76c2821
commit 679b092ee0
3 changed files with 34 additions and 38 deletions

View file

@ -28,12 +28,10 @@ struct URLwin
extern char* url;
extern int aflag;
extern int width;
extern int defcharset;
extern char* loadhtml(int);
extern char* readfile(char*, char*, int*);
extern int charset(char*);
extern void* emalloc(ulong);
extern char* estrdup(char*);
extern char* estrstrdup(char*, char*);

View file

@ -285,40 +285,13 @@ rerender(URLwin *u)
free(t);
}
/*
* Somewhat of a hack. Not a full parse, just looks for strings in the beginning
* of the document (cistrstr only looks at first somewhat bytes).
*/
int
charset(char *s)
{
char *meta, *emeta, *charset;
if(defcharset == 0)
defcharset = ISO_8859_1;
meta = cistrstr(s, "<meta");
if(meta == nil)
return defcharset;
for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
;
charset = cistrstr(s, "charset=");
if(charset == nil)
return defcharset;
charset += 8;
if(*charset == '"')
charset++;
if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
return UTF_8;
return defcharset;
}
void
rendertext(URLwin *u, Bytes *b)
{
Rune *rurl;
rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8);
u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo);
// free(rurl);
rerender(u);

View file

@ -8,7 +8,34 @@
char *url = "";
int aflag;
int width = 70;
int defcharset;
char *defcharset = "latin1";
int
uhtml(int fd)
{
int p[2];
if(pipe(p) < 0)
return fd;
switch(fork()){
case -1:
break;
case 0:
dup(fd, 0);
dup(p[1], 1);
close(p[1]);
close(p[0]);
execl("/bin/uhtml", "uhtml", "-c", defcharset, nil);
execl("/bin/cat", "cat", nil);
exits("exec");
default:
dup(p[0], fd);
break;
}
close(p[0]);
close(p[1]);
return fd;
}
void
usage(void)
@ -21,7 +48,7 @@ void
main(int argc, char *argv[])
{
int i, fd;
char *p, *err, *file;
char *err, *file;
char errbuf[ERRMAX];
ARGBEGIN{
@ -29,9 +56,7 @@ main(int argc, char *argv[])
aflag++;
break;
case 'c':
p = smprint("<meta charset=\"%s\">", EARGF(usage()));
defcharset = charset(p);
free(p);
defcharset = EARGF(usage());
break;
case 'l': case 'w':
err = EARGF(usage());
@ -50,7 +75,7 @@ main(int argc, char *argv[])
err = nil;
file = "<stdin>";
if(argc == 0)
err = loadhtml(0);
err = loadhtml(uhtml(0));
else
for(i=0; err==nil && i<argc; i++){
file = argv[i];
@ -60,7 +85,7 @@ main(int argc, char *argv[])
err = errbuf;
break;
}
err = loadhtml(fd);
err = loadhtml(uhtml(fd));
close(fd);
if(err)
break;