From f4480d1517834a7b64ba65c4fcf1087430ff17f0 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Sun, 24 Jun 2012 08:36:42 +0200 Subject: [PATCH] mothra/uhtml: properly handle quoting in tags --- sys/src/cmd/mothra/rdhtml.c | 16 +++++++++++++--- sys/src/cmd/uhtml.c | 22 +++++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/sys/src/cmd/mothra/rdhtml.c b/sys/src/cmd/mothra/rdhtml.c index b7599a5d1..8e29b46c8 100644 --- a/sys/src/cmd/mothra/rdhtml.c +++ b/sys/src/cmd/mothra/rdhtml.c @@ -474,13 +474,23 @@ int lrunetochar(char *p, int v) */ int pl_gettag(Hglob *g){ char *tokp; - int c; + int c, q; tokp=g->token; if((c=pl_nextc(g))=='!' || c=='?') return pl_getcomment(g); pl_putback(g, c); - while((c=pl_nextc(g))!=ETAG && c!=EOF) - if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c); + q = 0; + while((c=pl_nextc(g))!=EOF){ + if(c == '\'' || c == '"'){ + if(q == 0) + q = c; + else if(q == c) + q = 0; + } else if(c == ETAG && q == 0) + break; + if(tokp < &g->token[NTOKEN-UTFmax-1]) + tokp += lrunetochar(tokp, c); + } *tokp='\0'; if(c==EOF) htmlerror(g->name, g->lineno, "EOF in tag"); pl_tagparse(g, g->token); diff --git a/sys/src/cmd/uhtml.c b/sys/src/cmd/uhtml.c index f82b06953..9973111dd 100644 --- a/sys/src/cmd/uhtml.c +++ b/sys/src/cmd/uhtml.c @@ -47,7 +47,7 @@ attr(char *s, char *a) void main(int argc, char *argv[]) { - int n, pfd[2], pflag = 0; + int n, q, pfd[2], pflag = 0; char *arg[4], *s, *e, *p, *g, *a, t; Rune r; @@ -96,12 +96,24 @@ main(int argc, char *argv[]) do { if((s = strchr(s, '<')) == nil) break; - g = s; - if((e = strchr(++s, '>')) == nil) - e = buf+nbuf; + q = 0; + g = ++s; + e = buf+nbuf; + while(s < e){ + if(*s == '\'' || *s == '"'){ + if(q == 0) + q = *s; + else if(q == *s) + q = 0; + } else if(*s == '>' && q == 0){ + e = s; + break; + } + s++; + } t = *e; *e = 0; - if((a = attr(s, "encoding")) || (a = attr(s, "charset"))){ + if((a = attr(g, "encoding")) || (a = attr(g, "charset"))){ cset = a; *e = t; break;