plan9fox/sys/src/cmd/resample.c
cinap_lenrek e0cf0261d0 resample: improve performance (thanks José Miguel Sánchez García)
Resample is well known for taking a long time to resize an image. This
patch brings an important performance boost (in my test image, time
was reduced from ~2850ms to ~500ms). It does that by extracting FP
multiplication and division out of the innermost loop of
resamplex/resampley.

The results differ slightly from the current implementation: in my
test: ~0.3% of the bytes had a ±2 difference in their value, which I
attribute to rounding errors. I'm personally not concerned with that
deviation, given the performance gains. However, I recommend testing
it just to be sure I didn't overlook anything.

José Miguel Sánchez García
2021-04-25 12:16:40 +02:00

326 lines
6.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <u.h>
#include <libc.h>
#include <draw.h>
#include <memdraw.h>
#define K2 7 /* from -.7 to +.7 inclusive, meaning .2 into each adjacent pixel */
#define NK (2*K2+1)
double K[NK];
double
fac(int L)
{
int i, f;
f = 1;
for(i=L; i>1; --i)
f *= i;
return f;
}
/*
* i0(x) is the modified Bessel function, Σ (x/2)^2L / (L!)²
* There are faster ways to calculate this, but we precompute
* into a table so let's keep it simple.
*/
double
i0(double x)
{
double v;
int L;
v = 1.0;
for(L=1; L<10; L++)
v += pow(x/2., 2*L)/pow(fac(L), 2);
return v;
}
double
kaiser(double x, double τ, double α)
{
if(fabs(x) > τ)
return 0.;
return i0(α*sqrt(1-(x*x/(τ*τ))))/i0(α);
}
void
usage(void)
{
fprint(2, "usage: resample [-x xsize] [-y ysize] [imagefile]\n");
fprint(2, "\twhere size is an integer or a percentage in the form 25%%\n");
exits("usage");
}
int
getint(char *s, int *percent)
{
int n;
if(s == nil)
usage();
n = strtol(s, &s, 0);
*percent = *s == '%';
return n;
}
void
resamplex(uchar *in, int off, int d, int inx, uchar *out, int outx)
{
int i, x, k;
double X, xx, v, rat, rato10;
rat = (double)inx/(double)outx;
rato10 = rat/10.;
for(x=0; x<outx; x++){
if(inx == outx){
/* don't resample if size unchanged */
out[off+x*d] = in[off+x*d];
continue;
}
v = 0.0;
X = x*rat;
xx = X + rato10*(-K2);
for(k=-K2; k<=K2; k++){
i = xx;
if(i < 0)
i = 0;
if(i >= inx)
i = inx-1;
v += in[off+i*d] * K[K2+k];
xx += rato10;
}
out[off+x*d] = v;
}
}
void
resampley(uchar **in, int off, int iny, uchar **out, int outy)
{
int y, i, k;
double Y, yy, v, rat, rato10;
rat = (double)iny/(double)outy;
rato10 = rat/10.;
for(y=0; y<outy; y++){
if(iny == outy){
/* don't resample if size unchanged */
out[y][off] = in[y][off];
continue;
}
v = 0.0;
Y = y*rat;
yy = Y + rato10*(-K2);
for(k=-K2; k<=K2; k++){
i = yy;
if(i < 0)
i = 0;
if(i >= iny)
i = iny-1;
v += in[i][off] * K[K2+k];
yy += rato10;
}
out[y][off] = v;
}
}
int
max(int a, int b)
{
if(a > b)
return a;
return b;
}
Memimage*
resample(int xsize, int ysize, Memimage *m)
{
int i, j, d, bpl, nchan;
Memimage *new;
uchar **oscan, **nscan;
new = allocmemimage(Rect(0, 0, xsize, ysize), m->chan);
if(new == nil)
sysfatal("can't allocate new image: %r");
oscan = malloc(Dy(m->r)*sizeof(uchar*));
nscan = malloc(max(ysize, Dy(m->r))*sizeof(uchar*));
if(oscan == nil || nscan == nil)
sysfatal("can't allocate: %r");
/* unload original image into scan lines */
bpl = bytesperline(m->r, m->depth);
for(i=0; i<Dy(m->r); i++){
oscan[i] = malloc(bpl);
if(oscan[i] == nil)
sysfatal("can't allocate: %r");
j = unloadmemimage(m, Rect(m->r.min.x, m->r.min.y+i, m->r.max.x, m->r.min.y+i+1), oscan[i], bpl);
if(j != bpl)
sysfatal("unloadmemimage");
}
/* allocate scan lines for destination. we do y first, so need at least Dy(m->r) lines */
bpl = bytesperline(Rect(0, 0, xsize, Dy(m->r)), m->depth);
for(i=0; i<max(ysize, Dy(m->r)); i++){
nscan[i] = malloc(bpl);
if(nscan[i] == nil)
sysfatal("can't allocate: %r");
}
/* resample in X */
nchan = d = m->depth/8;
if(m->chan == XRGB32)
nchan--;
for(i=0; i<Dy(m->r); i++){
for(j=0; j<nchan; j++)
resamplex(oscan[i], j, d, Dx(m->r), nscan[i], xsize);
free(oscan[i]);
oscan[i] = nscan[i];
nscan[i] = malloc(bpl);
if(nscan[i] == nil)
sysfatal("can't allocate: %r");
}
/* resample in Y */
for(i=0; i<xsize; i++)
for(j=0; j<nchan; j++)
resampley(oscan, d*i+j, Dy(m->r), nscan, ysize);
/* pack data into destination */
bpl = bytesperline(new->r, m->depth);
for(i=0; i<ysize; i++){
j = loadmemimage(new, Rect(0, i, xsize, i+1), nscan[i], bpl);
if(j != bpl)
sysfatal("loadmemimage: %r");
}
return new;
}
void
main(int argc, char *argv[])
{
int i, fd, xsize, ysize, xpercent, ypercent;
Rectangle rparam;
Memimage *m, *new, *t1, *t2;
char *file;
ulong tchan;
double v;
for(i=-K2; i<=K2; i++){
K[K2+i] = kaiser(i/10., K2/10., 4.);
// print("%g %g\n", i/10., K[K2+i]);
}
/* normalize */
v = 0.0;
for(i=0; i<NK; i++)
v += K[i];
for(i=0; i<NK; i++)
K[i] /= v;
memimageinit();
memset(&rparam, 0, sizeof rparam);
xsize = ysize = 0;
xpercent = ypercent = 0;
ARGBEGIN{
case 'a': /* compatibility; equivalent to just -x or -y */
if(xsize != 0 || ysize != 0)
usage();
xsize = getint(ARGF(), &xpercent);
if(xsize <= 0)
usage();
ysize = xsize;
ypercent = xpercent;
break;
case 'x':
if(xsize != 0)
usage();
xsize = getint(ARGF(), &xpercent);
if(xsize <= 0)
usage();
break;
case 'y':
if(ysize != 0)
usage();
ysize = getint(ARGF(), &ypercent);
if(ysize <= 0)
usage();
break;
default:
usage();
}ARGEND
if(xsize == 0 && ysize == 0)
usage();
file = "<stdin>";
fd = 0;
if(argc > 1)
usage();
else if(argc == 1){
file = argv[0];
fd = open(file, OREAD);
if(fd < 0)
sysfatal("can't open %s: %r", file);
}
m = readmemimage(fd);
if(m == nil)
sysfatal("can't read %s: %r", file);
if(xpercent)
xsize = Dx(m->r)*xsize/100;
if(ypercent)
ysize = Dy(m->r)*ysize/100;
if(ysize == 0)
ysize = (xsize * Dy(m->r)) / Dx(m->r);
if(xsize == 0)
xsize = (ysize * Dx(m->r)) / Dy(m->r);
switch(m->chan){
default:
for(tchan = m->chan; tchan; tchan >>= 8)
if(TYPE(tchan) == CAlpha){
tchan = RGBA32;
goto Convert;
}
tchan = RGB24;
goto Convert;
case GREY8:
case RGB24:
case RGBA32:
case ARGB32:
case XRGB32:
new = resample(xsize, ysize, m);
break;
case GREY1:
case GREY2:
case GREY4:
tchan = GREY8;
Convert:
/* use library to convert to byte-per-chan form, then convert back */
t1 = allocmemimage(m->r, tchan);
if(t1 == nil)
sysfatal("can't allocate temporary image: %r");
memimagedraw(t1, t1->r, m, m->r.min, nil, ZP, S);
t2 = resample(xsize, ysize, t1);
freememimage(t1);
new = allocmemimage(Rect(0, 0, xsize, ysize), m->chan);
if(new == nil)
sysfatal("can't allocate new image: %r");
/* should do error diffusion here */
memimagedraw(new, new->r, t2, t2->r.min, nil, ZP, S);
freememimage(t2);
break;
}
assert(new);
if(writememimage(1, new) < 0)
sysfatal("write error on output: %r");
exits(nil);
}