uhtml: use first match

This commit is contained in:
cinap_lenrek 2012-07-16 05:32:16 +02:00
parent 739c04e572
commit 4c2c62ee96

View file

@ -39,8 +39,8 @@ attr(char *s, char *a)
continue; continue;
break; break;
} }
if(e - s > 1) if((e - s) > 1)
return smprint("%.*s", (int)(e-s), s); return smprint("%.*s", (int)(e - s), s);
return nil; return nil;
} }
@ -78,18 +78,21 @@ main(int argc, char *argv[])
if(nbuf == n){ if(nbuf == n){
if(memcmp(p, "\xEF\xBB\xBF", 3)==0){ if(memcmp(p, "\xEF\xBB\xBF", 3)==0){
p += 3; p += 3;
nbuf -= 3;
cset = "utf"; cset = "utf";
break; goto Found;
} }
if(memcmp(p, "\xFE\xFF", 2) == 0){ if(memcmp(p, "\xFE\xFF", 2) == 0){
p += 2; p += 2;
nbuf -= 2;
cset = "unicode-be"; cset = "unicode-be";
break; goto Found;
} }
if(memcmp(p, "\xFF\xFE", 2) == 0){ if(memcmp(p, "\xFF\xFE", 2) == 0){
p += 2; p += 2;
nbuf -= 2;
cset = "unicode-le"; cset = "unicode-le";
break; goto Found;
} }
} }
s = g; s = g;
@ -114,28 +117,24 @@ main(int argc, char *argv[])
t = *e; t = *e;
*e = 0; *e = 0;
if((a = attr(g, "encoding")) || (a = attr(g, "charset"))){ if((a = attr(g, "encoding")) || (a = attr(g, "charset"))){
cset = a;
*e = t; *e = t;
break; cset = a;
goto Found;
} }
*e = t; *e = t;
s = ++e; s = ++e;
} while(t); } while(t);
} }
nbuf -= p - buf; s = p;
while(s+UTFmax < p+nbuf){
if(cset == nil){ s += chartorune(&r, s);
cset = "utf"; if(r == Runeerror){
s = p; cset = "latin1";
while(s+UTFmax < p+nbuf){ goto Found;
s += chartorune(&r, s);
if(r == Runeerror){
cset = "latin1";
break;
}
} }
} }
cset = "utf";
Found:
if(pflag){ if(pflag){
print("%s\n", cset); print("%s\n", cset);
exits(0); exits(0);