libc: add encode(2) variants for custom alphabets

There are a number of alphabets in common use for base32
and base64 encoding, such as url-safe encodings.

This adds support for passing a function to encode into
arbitary alphabets.
This commit is contained in:
Ori Bernstein 2021-07-03 20:03:17 +00:00
parent c848ca6267
commit e934530ee4
4 changed files with 99 additions and 31 deletions

View file

@ -385,10 +385,15 @@ extern double charstod(int(*)(void*), void*);
extern char* cleanname(char*); extern char* cleanname(char*);
extern int decrypt(void*, void*, int); extern int decrypt(void*, void*, int);
extern int encrypt(void*, void*, int); extern int encrypt(void*, void*, int);
extern int dec64(uchar*, int, char*, int); extern int dec64(uchar*, int, char*, int);
extern int enc64(char*, int, uchar*, int); extern int enc64(char*, int, uchar*, int);
extern int dec64x(uchar*, int, char*, int, int (*)(int));
extern int enc64x(char*, int, uchar*, int, int (*)(int));
extern int dec32(uchar*, int, char*, int); extern int dec32(uchar*, int, char*, int);
extern int enc32(char*, int, uchar*, int); extern int enc32(char*, int, uchar*, int);
extern int dec32x(uchar*, int, char*, int, int (*)(int));
extern int enc32x(char*, int, uchar*, int, int (*)(int));
extern int dec16(uchar*, int, char*, int); extern int dec16(uchar*, int, char*, int);
extern int enc16(char*, int, uchar*, int); extern int enc16(char*, int, uchar*, int);
extern int dec64chr(int); extern int dec64chr(int);
@ -397,6 +402,7 @@ extern int dec32chr(int);
extern int enc32chr(int); extern int enc32chr(int);
extern int dec16chr(int); extern int dec16chr(int);
extern int enc16chr(int); extern int enc16chr(int);
extern int encodefmt(Fmt*); extern int encodefmt(Fmt*);
extern void exits(char*); extern void exits(char*);
extern double frexp(double, int*); extern double frexp(double, int*);

View file

@ -1,6 +1,7 @@
.TH ENCODE 2 .TH ENCODE 2
.SH NAME .SH NAME
dec64, enc64, dec32, enc32, dec16, enc16, \ dec64, enc64, dec32, enc32, dec16, enc16, \
dec64x, enc64x, dec32x, enc32x, \
dec64chr, enc64chr, dec32chr, enc32chr, dec16chr, enc16chr, \ dec64chr, enc64chr, dec32chr, enc32chr, dec16chr, enc16chr, \
encodefmt \- encoding byte arrays as strings encodefmt \- encoding byte arrays as strings
.SH SYNOPSIS .SH SYNOPSIS
@ -12,15 +13,27 @@ encodefmt \- encoding byte arrays as strings
int dec64(uchar *out, int lim, char *in, int n) int dec64(uchar *out, int lim, char *in, int n)
.PP .PP
.B .B
int dec64x(uchar *out, int lim, char *in, int n, int (*map)(int))
.PP
.B
int enc64(char *out, int lim, uchar *in, int n) int enc64(char *out, int lim, uchar *in, int n)
.PP .PP
.B .B
int enc64x(char *out, int lim, uchar *in, int n, int (*map)(int))
.PP
.B
int dec32(uchar *out, int lim, char *in, int n) int dec32(uchar *out, int lim, char *in, int n)
.PP .PP
.B .B
int dec32x(uchar *out, int lim, char *in, int n, int (*map)(int))
.PP
.B
int enc32(char *out, int lim, uchar *in, int n) int enc32(char *out, int lim, uchar *in, int n)
.PP .PP
.B .B
int enc32x(char *out, int lim, uchar *in, int n, int (*map)(int))
.PP
.B
int dec16(uchar *out, int lim, char *in, int n) int dec16(uchar *out, int lim, char *in, int n)
.PP .PP
.B .B
@ -82,6 +95,31 @@ and
.I enc64chr .I enc64chr
encode a symbol of the alphabet given a value. encode a symbol of the alphabet given a value.
if the value is out of range then zero is returned. if the value is out of range then zero is returned.
.PP
The
.I enc64x
and
.I enc32x
variants are identical to the above, except that they take a
function mapping from an arbitrary index in the alphabet to
the encoded character.
For example, in the following 32-character alphabet,
.EX
.I ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
.EE
the chr function would map the value
.I 3
to the character
.IR D .
The
.I dec64x
and
.I dec32x
variants are similar to the above, however the function passed
maps from a character within the alphabet to the index within
the alphabet.
.PP .PP
.I Encodefmt .I Encodefmt
can be used with can be used with

View file

@ -25,7 +25,7 @@ dec32chr(int c)
} }
int int
dec32(uchar *dest, int ndest, char *src, int nsrc) dec32x(uchar *dest, int ndest, char *src, int nsrc, int (*chr)(int))
{ {
uchar *start; uchar *start;
int i, j, u[8]; int i, j, u[8];
@ -35,7 +35,7 @@ dec32(uchar *dest, int ndest, char *src, int nsrc)
start = dest; start = dest;
while(nsrc>=8){ while(nsrc>=8){
for(i=0; i<8; i++){ for(i=0; i<8; i++){
j = dec32chr(src[i]); j = chr(src[i]);
if(j < 0) if(j < 0)
j = 0; j = 0;
u[i] = j; u[i] = j;
@ -52,7 +52,7 @@ dec32(uchar *dest, int ndest, char *src, int nsrc)
if(nsrc == 1 || nsrc == 3 || nsrc == 6) if(nsrc == 1 || nsrc == 3 || nsrc == 6)
return -1; return -1;
for(i=0; i<nsrc; i++){ for(i=0; i<nsrc; i++){
j = dec32chr(src[i]); j = chr(src[i]);
if(j < 0) if(j < 0)
j = 0; j = 0;
u[i] = j; u[i] = j;
@ -73,7 +73,7 @@ out:
} }
int int
enc32(char *dest, int ndest, uchar *src, int nsrc) enc32x(char *dest, int ndest, uchar *src, int nsrc, int (*chr)(int))
{ {
char *start; char *start;
int j; int j;
@ -83,50 +83,62 @@ enc32(char *dest, int ndest, uchar *src, int nsrc)
start = dest; start = dest;
while(nsrc>=5){ while(nsrc>=5){
j = (0x1f & (src[0]>>3)); j = (0x1f & (src[0]>>3));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1c & (src[0]<<2)) | (0x03 & (src[1]>>6)); j = (0x1c & (src[0]<<2)) | (0x03 & (src[1]>>6));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1f & (src[1]>>1)); j = (0x1f & (src[1]>>1));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x10 & (src[1]<<4)) | (0x0f & (src[2]>>4)); j = (0x10 & (src[1]<<4)) | (0x0f & (src[2]>>4));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1e & (src[2]<<1)) | (0x01 & (src[3]>>7)); j = (0x1e & (src[2]<<1)) | (0x01 & (src[3]>>7));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1f & (src[3]>>2)); j = (0x1f & (src[3]>>2));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x18 & (src[3]<<3)) | (0x07 & (src[4]>>5)); j = (0x18 & (src[3]<<3)) | (0x07 & (src[4]>>5));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1f & (src[4])); j = (0x1f & (src[4]));
*dest++ = enc32chr(j); *dest++ = chr(j);
src += 5; src += 5;
nsrc -= 5; nsrc -= 5;
} }
if(nsrc){ if(nsrc){
j = (0x1f & (src[0]>>3)); j = (0x1f & (src[0]>>3));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1c & (src[0]<<2)); j = (0x1c & (src[0]<<2));
if(nsrc == 1) if(nsrc == 1)
goto out; goto out;
j |= (0x03 & (src[1]>>6)); j |= (0x03 & (src[1]>>6));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1f & (src[1]>>1)); j = (0x1f & (src[1]>>1));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x10 & (src[1]<<4)); j = (0x10 & (src[1]<<4));
if(nsrc == 2) if(nsrc == 2)
goto out; goto out;
j |= (0x0f & (src[2]>>4)); j |= (0x0f & (src[2]>>4));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1e & (src[2]<<1)); j = (0x1e & (src[2]<<1));
if(nsrc == 3) if(nsrc == 3)
goto out; goto out;
j |= (0x01 & (src[3]>>7)); j |= (0x01 & (src[3]>>7));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x1f & (src[3]>>2)); j = (0x1f & (src[3]>>2));
*dest++ = enc32chr(j); *dest++ = chr(j);
j = (0x18 & (src[3]<<3)); j = (0x18 & (src[3]<<3));
out: out:
*dest++ = enc32chr(j); *dest++ = chr(j);
} }
*dest = 0; *dest = 0;
return dest-start; return dest-start;
} }
int
enc32(char *dest, int ndest, uchar *src, int nsrc)
{
return enc32x(dest, ndest, src, nsrc, enc32chr);
}
int
dec32(uchar *dest, int ndest, char *src, int nsrc)
{
return dec32x(dest, ndest, src, nsrc, dec32chr);
}

View file

@ -30,7 +30,7 @@ dec64chr(int c)
} }
int int
dec64(uchar *out, int lim, char *in, int n) dec64x(uchar *out, int lim, char *in, int n, int (*chr)(int))
{ {
ulong b24; ulong b24;
uchar *start = out; uchar *start = out;
@ -40,7 +40,7 @@ dec64(uchar *out, int lim, char *in, int n)
b24 = 0; b24 = 0;
i = 0; i = 0;
while(n-- > 0){ while(n-- > 0){
c = dec64chr(*in++); c = chr(*in++);
if(c < 0) if(c < 0)
continue; continue;
switch(i){ switch(i){
@ -84,7 +84,7 @@ exhausted:
} }
int int
enc64(char *out, int lim, uchar *in, int n) enc64x(char *out, int lim, uchar *in, int n, int (*chr)(int))
{ {
int i; int i;
ulong b24; ulong b24;
@ -97,10 +97,10 @@ enc64(char *out, int lim, uchar *in, int n)
b24 |= *in++; b24 |= *in++;
if(out + 4 >= e) if(out + 4 >= e)
goto exhausted; goto exhausted;
*out++ = enc64chr(b24>>18); *out++ = chr(b24>>18);
*out++ = enc64chr((b24>>12)&0x3f); *out++ = chr((b24>>12)&0x3f);
*out++ = enc64chr((b24>>6)&0x3f); *out++ = chr((b24>>6)&0x3f);
*out++ = enc64chr(b24&0x3f); *out++ = chr(b24&0x3f);
} }
switch(n%3){ switch(n%3){
@ -109,17 +109,17 @@ enc64(char *out, int lim, uchar *in, int n)
b24 |= *in<<8; b24 |= *in<<8;
if(out + 4 >= e) if(out + 4 >= e)
goto exhausted; goto exhausted;
*out++ = enc64chr(b24>>18); *out++ = chr(b24>>18);
*out++ = enc64chr((b24>>12)&0x3f); *out++ = chr((b24>>12)&0x3f);
*out++ = enc64chr((b24>>6)&0x3f); *out++ = chr((b24>>6)&0x3f);
*out++ = '='; *out++ = '=';
break; break;
case 1: case 1:
b24 = *in<<16; b24 = *in<<16;
if(out + 4 >= e) if(out + 4 >= e)
goto exhausted; goto exhausted;
*out++ = enc64chr(b24>>18); *out++ = chr(b24>>18);
*out++ = enc64chr((b24>>12)&0x3f); *out++ = chr((b24>>12)&0x3f);
*out++ = '='; *out++ = '=';
*out++ = '='; *out++ = '=';
break; break;
@ -128,3 +128,15 @@ exhausted:
*out = 0; *out = 0;
return out - start; return out - start;
} }
int
enc64(char *out, int lim, uchar *in, int n)
{
return enc64x(out, lim, in, n, enc64chr);
}
int
dec64(uchar *out, int lim, char *in, int n)
{
return dec64x(out, lim, in, n, dec64chr);
}