replace urlencode with c version that isnt broken for utf-8

This commit is contained in:
cinap_lenrek 2012-10-05 23:14:23 +02:00
parent 0c93da13ae
commit 4af5408953
2 changed files with 98 additions and 36 deletions

View file

@ -1,36 +0,0 @@
#!/bin/awk -f
BEGIN {
# We assume an awk implementation that is just plain dumb.
# We will convert an character to its ASCII value with the
# table ord[], and produce two-digit hexadecimal output
# without the printf("%02X") feature.
EOL = "%0A" # "end of line" string (encoded)
split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
hextab [0] = 0
for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
}
{
encoded = ""
for ( i=1; i<=length ($0); ++i ) {
c = substr ($0, i, 1)
if ( c ~ /[a-zA-Z0-9.-]/ ) {
encoded = encoded c # safe character
} else if ( c == " " ) {
encoded = encoded "+" # special handling
} else {
# unsafe character, encode it as a two-digit hex-number
lo = ord [c] % 16
hi = int (ord [c] / 16);
encoded = encoded "%" hextab [hi] hextab [lo]
}
}
if ( EncodeEOL ) {
printf ("%s", encoded EOL)
} else {
print encoded
}
}
END {
#if ( EncodeEOL ) print ""
}

98
sys/src/cmd/urlencode.c Normal file
View file

@ -0,0 +1,98 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
Biobuf bin;
Biobuf bout;
int dflag;
char hex[] = "0123456789abcdef";
char Hex[] = "0123456789ABCDEF";
int
hexdigit(int c)
{
char *p;
if(c >= 0){
if((p = strchr(Hex, c)) != 0)
return p - Hex;
if((p = strchr(hex, c)) != 0)
return p - hex;
}
return -1;
}
void
usage(void)
{
fprint(2, "Usage: %s [ -d ] [ file ]\n", argv0);
exits("usage");
}
void
main(int argc, char *argv[])
{
int c;
ARGBEGIN {
case 'd':
dflag = 1;
break;
default:
usage();
} ARGEND;
if(argc == 1){
close(0);
if(open(*argv, OREAD) < 0)
sysfatal("%r");
} else if(argc > 1)
usage();
Binit(&bin, 0, OREAD);
Binit(&bout, 1, OWRITE);
if(dflag){
while((c = Bgetc(&bin)) >= 0){
if(c == '%'){
int c1, c2, x1, x2;
if((c1 = Bgetc(&bin)) < 0)
break;
if((x1 = hexdigit(c1)) < 0){
Bungetc(&bin);
Bputc(&bout, c);
continue;
}
if((c2 = Bgetc(&bin)) < 0)
break;
if((x2 = hexdigit(c2)) < 0){
Bungetc(&bin);
Bputc(&bout, c);
Bputc(&bout, c1);
continue;
}
c = x1<<4 | x2;
}
Bputc(&bout, c);
}
} else {
while((c = Bgetc(&bin)) >= 0){
if(strchr("/$-_@.!*'(),", c)
|| 'a'<=c && c<='z'
|| 'A'<=c && c<='Z'
|| '0'<=c && c<='9')
Bputc(&bout, c);
else if(c == ' ')
Bputc(&bout, '+');
else {
Bputc(&bout, '%');
Bputc(&bout, Hex[c>>4]);
Bputc(&bout, Hex[c&15]);
}
}
}
Bflush(&bout);
exits(0);
}