webfs: support for internationalized domain name urls

This commit is contained in:
cinap_lenrek 2013-11-24 11:56:33 +01:00
parent 3720b5ab9c
commit f2bd1de5bd
7 changed files with 313 additions and 7 deletions

View file

@ -68,3 +68,7 @@ int debug;
Url *proxy;
int timeout;
char *whitespace;
enum {
Domlen = 256,
};

View file

@ -16,6 +16,7 @@ char* unquote(char *s, char **ps);
#pragma varargck type "E" Str2
int Efmt(Fmt*);
int Hfmt(Fmt*);
int Ufmt(Fmt*);
char* Upath(Url *);
Url* url(char *s, Url *b);
@ -23,6 +24,10 @@ Url* saneurl(Url *u);
int matchurl(Url *u, Url *s);
void freeurl(Url *u);
/* idn */
char* idn2utf(char *name, char *buf, int nbuf);
char* utf2idn(char *name, char *buf, int nbuf);
/* buq */
int buread(Buq *q, void *v, int l);
int buwrite(Buq *q, void *v, int l);

View file

@ -765,6 +765,7 @@ main(int argc, char *argv[])
quotefmtinstall();
fmtinstall('U', Ufmt);
fmtinstall('E', Efmt);
fmtinstall('H', Hfmt);
srv = nil;
mtpt = "/mnt/web";

View file

@ -573,7 +573,7 @@ http(char *m, Url *u, Key *shdr, Buq *qbody, Buq *qpost)
ru.path = Upath(u);
ru.query = u->query;
}
n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %s%s%s\r\n",
n = snprint(buf, sizeof(buf), "%s %U HTTP/1.1\r\nHost: %H%s%s\r\n",
method, &ru, u->host, u->port ? ":" : "", u->port ? u->port : "");
if(n >= sizeof(buf)-64){
werrstr("request too large");

267
sys/src/cmd/webfs/idn.c Normal file
View file

@ -0,0 +1,267 @@
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <fcall.h>
#include <thread.h>
#include <9p.h>
#include "dat.h"
#include "fns.h"
enum {
base = 36,
tmin = 1,
tmax = 26,
skew = 38,
damp = 700,
initial_bias = 72,
initial_n = 0x80,
};
static uint maxint = ~0;
static uint
decode_digit(uint cp)
{
if((cp - '0') < 10)
return cp - ('0' - 26);
if((cp - 'A') < 26)
return cp - 'A';
if((cp - 'a') < 26)
return cp - 'a';
return base;
}
static char
encode_digit(uint d, int flag)
{
if(d < 26)
return d + (flag ? 'A' : 'a');
return d + ('0' - 26);
}
static uint
adapt(uint delta, uint numpoints, int firsttime)
{
uint k;
delta = firsttime ? delta / damp : delta >> 1;
delta += delta / numpoints;
for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
delta /= base - tmin;
return k + (base - tmin + 1) * delta / (delta + skew);
}
static int
punyencode(uint input_length, Rune input[], uint max_out, char output[])
{
uint n, delta, h, b, out, bias, j, m, q, k, t;
n = initial_n;
delta = out = 0;
bias = initial_bias;
for (j = 0; j < input_length; ++j) {
if ((uint)input[j] < 0x80) {
if (max_out - out < 2)
return -1;
output[out++] = input[j];
}
}
h = b = out;
if (b > 0)
output[out++] = '-';
while (h < input_length) {
for (m = maxint, j = 0; j < input_length; ++j) {
if (input[j] >= n && input[j] < m)
m = input[j];
}
if (m - n > (maxint - delta) / (h + 1))
return -1;
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_length; ++j) {
if (input[j] < n) {
if (++delta == 0)
return -1;
}
if (input[j] == n) {
for (q = delta, k = base;; k += base) {
if (out >= max_out)
return -1;
if (k <= bias)
t = tmin;
else if (k >= bias + tmax)
t = tmax;
else
t = k - bias;
if (q < t)
break;
output[out++] = encode_digit(t + (q - t) % (base - t), 0);
q = (q - t) / (base - t);
}
output[out++] = encode_digit(q, isupperrune(input[j]));
bias = adapt(delta, h + 1, h == b);
delta = 0;
++h;
}
}
++delta, ++n;
}
return (int)out;
}
static int
punydecode(uint input_length, char input[], uint max_out, Rune output[])
{
uint n, out, i, bias, b, j, in, oldi, w, k, digit, t;
n = initial_n;
out = i = 0;
bias = initial_bias;
for (b = j = 0; j < input_length; ++j)
if (input[j] == '-')
b = j;
if (b > max_out)
return -1;
for (j = 0; j < b; ++j) {
if (input[j] & 0x80)
return -1;
output[out++] = input[j];
}
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
for (oldi = i, w = 1, k = base;; k += base) {
if (in >= input_length)
return -1;
digit = decode_digit(input[in++]);
if (digit >= base)
return -1;
if (digit > (maxint - i) / w)
return -1;
i += digit * w;
if (k <= bias)
t = tmin;
else if (k >= bias + tmax)
t = tmax;
else
t = k - bias;
if (digit < t)
break;
if (w > maxint / (base - t))
return -1;
w *= (base - t);
}
bias = adapt(i - oldi, out + 1, oldi == 0);
if (i / (out + 1) > maxint - n)
return -1;
n += i / (out + 1);
i %= (out + 1);
if (out >= max_out)
return -1;
memmove(output + i + 1, output + i, (out - i) * sizeof *output);
if(((uint)input[in-1] - 'A') < 26)
output[i++] = toupperrune(n);
else
output[i++] = tolowerrune(n);
}
return (int)out;
}
/*
* convert punycode encoded internationalized
* domain name to unicode string
*/
char*
idn2utf(char *name, char *buf, int nbuf)
{
char *dp, *de, *cp;
Rune rb[Domlen], r;
int nc, nr, n;
cp = name;
dp = buf;
de = dp+nbuf-1;
for(;;){
nc = nr = 0;
while(cp[nc] != 0){
n = chartorune(&r, cp+nc);
if(r == '.')
break;
rb[nr++] = r;
nc += n;
}
if(cistrncmp(cp, "xn--", 4) == 0)
if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0)
return nil;
dp = seprint(dp, de, "%.*S", nr, rb);
if(dp >= de)
return nil;
if(cp[nc] == 0)
break;
*dp++ = '.';
cp += nc+1;
}
*dp = 0;
return buf;
}
/*
* convert unicode string to punycode
* encoded internationalized domain name
*/
char*
utf2idn(char *name, char *buf, int nbuf)
{
char *dp, *de, *cp;
Rune rb[Domlen], r;
int nc, nr, n;
dp = buf;
de = dp+nbuf-1;
cp = name;
for(;;){
nc = nr = 0;
while(cp[nc] != 0 && nr < nelem(rb)){
n = chartorune(&r, cp+nc);
if(r == '.')
break;
rb[nr++] = r;
nc += n;
}
if(nc == nr)
dp = seprint(dp, de, "%.*s", nc, cp);
else {
dp = seprint(dp, de, "xn--");
if((n = punyencode(nr, rb, de - dp, dp)) < 0)
return nil;
dp += n;
}
if(dp >= de)
return nil;
if(cp[nc] == 0)
break;
*dp++ = '.';
cp += nc+1;
}
*dp = 0;
return buf;
}

View file

@ -3,6 +3,6 @@ BIN=/$objtype/bin
TARG=webfs
HFILES=fns.h dat.h
OFILES=sub.$O url.$O buq.$O http.$O fs.$O
OFILES=sub.$O url.$O buq.$O http.$O fs.$O idn.$O
</sys/src/cmd/mkone

View file

@ -68,6 +68,21 @@ Efmt(Fmt *f)
return 0;
}
int
Hfmt(Fmt *f)
{
char *d, *s;
s = va_arg(f->args, char*);
d = emalloc(Domlen);
if(utf2idn(s, d, Domlen) == nil)
d = s;
fmtprint(f, "%s", d);
if(d != s)
free(d);
return 0;
}
int
Ufmt(Fmt *f)
{
@ -87,7 +102,7 @@ Ufmt(Fmt *f)
fmtprint(f, "@");
}
if(u->host){
fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%s", u->host);
fmtprint(f, strchr(u->host, ':') ? "[%s]" : "%H", u->host);
if(u->port)
fmtprint(f, ":%s", u->port);
}
@ -184,12 +199,17 @@ pstrdup(char **p)
static char*
mklowcase(char *s)
{
char *p;
char *cp;
Rune r;
if(s == nil)
return s;
for(p = s; *p; p++)
*p = tolower(*p);
cp = s;
while(*cp != 0){
chartorune(&r, cp);
r = tolowerrune(r);
cp += runetochar(cp, &r);
}
return s;
}
@ -299,6 +319,15 @@ Out:
while(s = strchr(s, '+'))
*s++ = ' ';
if(s = u->host){
t = emalloc(Domlen);
if(idn2utf(s, t, Domlen)){
u->host = estrdup(t);
free(s);
}
free(t);
}
unescape(u->user, "");
unescape(u->pass, "");
unescape(u->path, "/");