replace urlencode with c version that isnt broken for utf-8
This commit is contained in:
parent
0c93da13ae
commit
4af5408953
2 changed files with 98 additions and 36 deletions
|
@ -1,36 +0,0 @@
|
|||
#!/bin/awk -f
|
||||
BEGIN {
|
||||
# We assume an awk implementation that is just plain dumb.
|
||||
# We will convert an character to its ASCII value with the
|
||||
# table ord[], and produce two-digit hexadecimal output
|
||||
# without the printf("%02X") feature.
|
||||
|
||||
EOL = "%0A" # "end of line" string (encoded)
|
||||
split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
|
||||
hextab [0] = 0
|
||||
for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
|
||||
}
|
||||
{
|
||||
encoded = ""
|
||||
for ( i=1; i<=length ($0); ++i ) {
|
||||
c = substr ($0, i, 1)
|
||||
if ( c ~ /[a-zA-Z0-9.-]/ ) {
|
||||
encoded = encoded c # safe character
|
||||
} else if ( c == " " ) {
|
||||
encoded = encoded "+" # special handling
|
||||
} else {
|
||||
# unsafe character, encode it as a two-digit hex-number
|
||||
lo = ord [c] % 16
|
||||
hi = int (ord [c] / 16);
|
||||
encoded = encoded "%" hextab [hi] hextab [lo]
|
||||
}
|
||||
}
|
||||
if ( EncodeEOL ) {
|
||||
printf ("%s", encoded EOL)
|
||||
} else {
|
||||
print encoded
|
||||
}
|
||||
}
|
||||
END {
|
||||
#if ( EncodeEOL ) print ""
|
||||
}
|
98
sys/src/cmd/urlencode.c
Normal file
98
sys/src/cmd/urlencode.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
|
||||
Biobuf bin;
|
||||
Biobuf bout;
|
||||
int dflag;
|
||||
|
||||
char hex[] = "0123456789abcdef";
|
||||
char Hex[] = "0123456789ABCDEF";
|
||||
|
||||
int
|
||||
hexdigit(int c)
|
||||
{
|
||||
char *p;
|
||||
|
||||
if(c >= 0){
|
||||
if((p = strchr(Hex, c)) != 0)
|
||||
return p - Hex;
|
||||
if((p = strchr(hex, c)) != 0)
|
||||
return p - hex;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
fprint(2, "Usage: %s [ -d ] [ file ]\n", argv0);
|
||||
exits("usage");
|
||||
}
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int c;
|
||||
|
||||
ARGBEGIN {
|
||||
case 'd':
|
||||
dflag = 1;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
if(argc == 1){
|
||||
close(0);
|
||||
if(open(*argv, OREAD) < 0)
|
||||
sysfatal("%r");
|
||||
} else if(argc > 1)
|
||||
usage();
|
||||
|
||||
Binit(&bin, 0, OREAD);
|
||||
Binit(&bout, 1, OWRITE);
|
||||
|
||||
if(dflag){
|
||||
while((c = Bgetc(&bin)) >= 0){
|
||||
if(c == '%'){
|
||||
int c1, c2, x1, x2;
|
||||
|
||||
if((c1 = Bgetc(&bin)) < 0)
|
||||
break;
|
||||
if((x1 = hexdigit(c1)) < 0){
|
||||
Bungetc(&bin);
|
||||
Bputc(&bout, c);
|
||||
continue;
|
||||
}
|
||||
if((c2 = Bgetc(&bin)) < 0)
|
||||
break;
|
||||
if((x2 = hexdigit(c2)) < 0){
|
||||
Bungetc(&bin);
|
||||
Bputc(&bout, c);
|
||||
Bputc(&bout, c1);
|
||||
continue;
|
||||
}
|
||||
c = x1<<4 | x2;
|
||||
}
|
||||
Bputc(&bout, c);
|
||||
}
|
||||
} else {
|
||||
while((c = Bgetc(&bin)) >= 0){
|
||||
if(strchr("/$-_@.!*'(),", c)
|
||||
|| 'a'<=c && c<='z'
|
||||
|| 'A'<=c && c<='Z'
|
||||
|| '0'<=c && c<='9')
|
||||
Bputc(&bout, c);
|
||||
else if(c == ' ')
|
||||
Bputc(&bout, '+');
|
||||
else {
|
||||
Bputc(&bout, '%');
|
||||
Bputc(&bout, Hex[c>>4]);
|
||||
Bputc(&bout, Hex[c&15]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Bflush(&bout);
|
||||
exits(0);
|
||||
}
|
Loading…
Reference in a new issue