1000+% performance increase in 1bpp dib -> 1bpp dib blitting.

svn path=/trunk/; revision=8824
This commit is contained in:
Royce Mitchell III 2004-03-21 04:17:33 +00:00
parent 6a7304ac01
commit a8f59cffef
3 changed files with 346 additions and 261 deletions

View file

@ -16,7 +16,7 @@
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
/* $Id: dib.c,v 1.6 2004/01/24 11:59:00 navaraf Exp $ */ /* $Id: dib.c,v 1.7 2004/03/21 04:17:33 royce Exp $ */
#include <windows.h> #include <windows.h>
#include <ddk/winddi.h> #include <ddk/winddi.h>
@ -29,7 +29,6 @@
unsigned char notmask[2] = { 0x0f, 0xf0 }; unsigned char notmask[2] = { 0x0f, 0xf0 };
unsigned char altnotmask[2] = { 0xf0, 0x0f }; unsigned char altnotmask[2] = { 0xf0, 0x0f };
unsigned char mask1Bpp[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
ULONG ULONG
DIB_GetSource(SURFOBJ* SourceSurf, SURFGDI* SourceGDI, ULONG sx, ULONG sy, XLATEOBJ* ColorTranslation) DIB_GetSource(SURFOBJ* SourceSurf, SURFGDI* SourceGDI, ULONG sx, ULONG sy, XLATEOBJ* ColorTranslation)

View file

@ -1,6 +1,6 @@
extern unsigned char notmask[2]; extern unsigned char notmask[2];
extern unsigned char altnotmask[2]; extern unsigned char altnotmask[2];
extern unsigned char mask1Bpp[8]; #define MASK1BPP(x) (1<<(7-((x)&7)))
ULONG DIB_DoRop(ULONG Rop, ULONG Dest, ULONG Source, ULONG Pattern); ULONG DIB_DoRop(ULONG Rop, ULONG Dest, ULONG Source, ULONG Pattern);
ULONG DIB_GetSource(SURFOBJ* SourceSurf, SURFGDI* SourceGDI, ULONG sx, ULONG sy, XLATEOBJ* ColorTranslation); ULONG DIB_GetSource(SURFOBJ* SourceSurf, SURFGDI* SourceGDI, ULONG sx, ULONG sy, XLATEOBJ* ColorTranslation);
@ -93,3 +93,4 @@ BOOLEAN DIB_32BPP_StretchBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
RECTL* DestRect, RECTL *SourceRect, RECTL* DestRect, RECTL *SourceRect,
POINTL* MaskOrigin, POINTL* BrushOrigin, POINTL* MaskOrigin, POINTL* BrushOrigin,
XLATEOBJ *ColorTranslation, ULONG Mode); XLATEOBJ *ColorTranslation, ULONG Mode);

View file

@ -16,7 +16,7 @@
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
/* $Id: dib1bpp.c,v 1.14 2003/12/08 18:05:30 fireball Exp $ */ /* $Id: dib1bpp.c,v 1.15 2004/03/21 04:17:33 royce Exp $ */
#undef WIN32_LEAN_AND_MEAN #undef WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
@ -31,18 +31,12 @@
VOID VOID
DIB_1BPP_PutPixel(PSURFOBJ SurfObj, LONG x, LONG y, ULONG c) DIB_1BPP_PutPixel(PSURFOBJ SurfObj, LONG x, LONG y, ULONG c)
{ {
PBYTE addr = SurfObj->pvScan0; PBYTE addr = SurfObj->pvScan0 + y * SurfObj->lDelta + (x >> 3);
addr += y * SurfObj->lDelta + (x >> 3); if ( !c )
*addr &= ~MASK1BPP(x);
if(c == 0)
{
*addr = (*addr & (~ mask1Bpp[x % 8]));
}
else else
{ *addr |= MASK1BPP(x);
*addr = (*addr | mask1Bpp[x % 8]);
}
} }
ULONG ULONG
@ -50,7 +44,7 @@ DIB_1BPP_GetPixel(PSURFOBJ SurfObj, LONG x, LONG y)
{ {
PBYTE addr = SurfObj->pvScan0 + y * SurfObj->lDelta + (x >> 3); PBYTE addr = SurfObj->pvScan0 + y * SurfObj->lDelta + (x >> 3);
return (*addr & mask1Bpp[x % 8] ? 1 : 0); return (*addr & MASK1BPP(x) ? 1 : 0);
} }
VOID VOID
@ -71,8 +65,174 @@ DIB_1BPP_VLine(PSURFOBJ SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
} }
} }
static
void
DIB_1BPP_BitBltSrcCopy_From1BPP (
SURFOBJ* DestSurf, SURFOBJ* SourceSurf,
PRECTL DestRect, POINTL *SourcePoint )
{
// the 'window' in this sense is the x-position that corresponds
// to the left-edge of the 8-pixel byte we are currently working with.
// dwx is current x-window, dwx2 is the 'last' window we need to process
int dwx, dwx2; // destination window x-position
int swx; // source window y-position
// left and right edges of source and dest rectangles
int dl = DestRect->left; // dest left
int dr = DestRect->right-1; // dest right (inclusive)
int sl = SourcePoint->x; // source left
int sr = sl + dr - dl; // source right (inclusive)
// which direction are we going?
int xinc;
int yinc;
// following 4 variables are used for the y-sweep
int dy; // dest y
int dy1; // dest y start
int dy2; // dest y end
int sy1; // src y start
int shift;
BYTE srcmask, dstmask;
// 'd' and 's' are the dest & src buffer pointers that I use on my x-sweep
// 'pd' and 'ps' are the dest & src buffer pointers used on the inner y-sweep
PBYTE d, pd; // dest ptrs
PBYTE s, ps; // src ptrs
shift = (dl-sl)&7;
if ( DestRect->top <= SourcePoint->y )
{
// moving up ( scan top -> bottom )
dy1 = DestRect->top;
dy2 = DestRect->bottom - 1;
sy1 = SourcePoint->y;
yinc = 1;
}
else
{
// moving down ( scan bottom -> top )
dy1 = DestRect->bottom - 1;
dy2 = DestRect->top;
sy1 = SourcePoint->y + dy1 - dy2;
yinc = -1;
}
if ( DestRect->left <= SourcePoint->x )
{
// moving left ( scan left->right )
dwx = dl&~7;
swx = (sl-(dl&7))&~7;
dwx2 = dr&~7;
xinc = 1;
}
else
{
// moving right ( scan right->left )
dwx = dr&~7;
swx = (sr-(dr&7))&~7; //(sr-7)&~7; // we need the left edge of this block... thus the -7
dwx2 = dl&~7;
xinc = -1;
}
d = &(((PBYTE)DestSurf->pvScan0)[dy1*DestSurf->lDelta + (dwx>>3)]);
s = &(((PBYTE)SourceSurf->pvScan0)[sy1*SourceSurf->lDelta + (swx>>3)]);
for ( ;; )
{
dy = dy1;
pd = d;
ps = s;
srcmask = 0xff;
int dx = dwx; /* dest x for this pass */
if ( dwx < dl )
{
int diff = dl-dwx;
srcmask &= (1<<(8-diff))-1;
dx = dl;
}
if ( dwx+7 > dr )
{
int diff = dr-dwx+1;
srcmask &= ~((1<<(8-diff))-1);
}
dstmask = ~srcmask;
// we unfortunately *must* have 5 different versions of the inner
// loop to be certain we don't try to read from memory that is not
// needed and may in fact be invalid
if ( !shift )
{
for ( ;; )
{
*pd = (BYTE)((*pd & dstmask) | (*ps & srcmask));
// this *must* be here, because we could be going up *or* down...
if ( dy == dy2 )
break;
dy += yinc;
pd += yinc * DestSurf->lDelta;
ps += yinc * SourceSurf->lDelta;
}
}
else if ( !(0xFF00 & (srcmask<<shift) ) ) // check if ps[0] not needed...
{
for ( ;; )
{
*pd = (BYTE)((*pd & dstmask)
| ( ( ps[1] >> shift ) & srcmask ));
// this *must* be here, because we could be going up *or* down...
if ( dy == dy2 )
break;
dy += yinc;
pd += yinc * DestSurf->lDelta;
ps += yinc * SourceSurf->lDelta;
}
}
else if ( !(0xFF & (srcmask<<shift) ) ) // check if ps[1] not needed...
{
for ( ;; )
{
*pd = (*pd & dstmask)
| ( ( ps[0] << ( 8 - shift ) ) & srcmask );
// this *must* be here, because we could be going up *or* down...
if ( dy == dy2 )
break;
dy += yinc;
pd += yinc * DestSurf->lDelta;
ps += yinc * SourceSurf->lDelta;
}
}
else // both ps[0] and ps[1] are needed
{
for ( ;; )
{
*pd = (*pd & dstmask)
| ( ( ( (ps[1])|(ps[0]<<8) ) >> shift ) & srcmask );
// this *must* be here, because we could be going up *or* down...
if ( dy == dy2 )
break;
dy += yinc;
pd += yinc * DestSurf->lDelta;
ps += yinc * SourceSurf->lDelta;
}
}
// this *must* be here, because we could be going right *or* left...
if ( dwx == dwx2 )
break;
d += xinc;
s += xinc;
dwx += xinc<<3;
swx += xinc<<3;
}
}
BOOLEAN BOOLEAN
DIB_1BPP_BitBltSrcCopy(SURFOBJ *DestSurf, SURFOBJ *SourceSurf, DIB_1BPP_BitBltSrcCopy(
SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
SURFGDI *DestGDI, SURFGDI *SourceGDI, SURFGDI *DestGDI, SURFGDI *SourceGDI,
PRECTL DestRect, POINTL *SourcePoint, PRECTL DestRect, POINTL *SourcePoint,
XLATEOBJ *ColorTranslation) XLATEOBJ *ColorTranslation)
@ -82,85 +242,7 @@ DIB_1BPP_BitBltSrcCopy(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
switch ( SourceGDI->BitsPerPixel ) switch ( SourceGDI->BitsPerPixel )
{ {
case 1: case 1:
if (DestRect->top < SourcePoint->y) DIB_1BPP_BitBltSrcCopy_From1BPP ( DestSurf, SourceSurf, DestRect, SourcePoint );
{
for (j = DestRect->top; j < DestRect->bottom; j++)
{
if (DestRect->left < SourcePoint->x)
{
sx = SourcePoint->x;
for (i=DestRect->left; i<DestRect->right; i++)
{
if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
{
DIB_1BPP_PutPixel(DestSurf, i, j, 0);
}
else
{
DIB_1BPP_PutPixel(DestSurf, i, j, 1);
}
sx++;
}
}
else
{
sx = SourcePoint->x + DestRect->right - DestRect->left - 1;
for (i = DestRect->right - 1; DestRect->left <= i; i--)
{
if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
{
DIB_1BPP_PutPixel(DestSurf, i, j, 0);
}
else
{
DIB_1BPP_PutPixel(DestSurf, i, j, 1);
}
sx--;
}
}
sy++;
}
}
else
{
sy = SourcePoint->y + DestRect->bottom - DestRect->top - 1;
for (j = DestRect->bottom - 1; DestRect->top <= j; j--)
{
if (DestRect->left < SourcePoint->x)
{
sx = SourcePoint->x;
for (i=DestRect->left; i<DestRect->right; i++)
{
if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
{
DIB_1BPP_PutPixel(DestSurf, i, j, 0);
}
else
{
DIB_1BPP_PutPixel(DestSurf, i, j, 1);
}
sx++;
}
}
else
{
sx = SourcePoint->x + DestRect->right - DestRect->left - 1;
for (i = DestRect->right - 1; DestRect->left <= i; i--)
{
if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
{
DIB_1BPP_PutPixel(DestSurf, i, j, 0);
}
else
{
DIB_1BPP_PutPixel(DestSurf, i, j, 1);
}
sx--;
}
}
sy--;
}
}
break; break;
case 4: case 4:
@ -262,7 +344,8 @@ DIB_1BPP_BitBltSrcCopy(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
} }
BOOLEAN BOOLEAN
DIB_1BPP_BitBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf, DIB_1BPP_BitBlt(
SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
SURFGDI *DestGDI, SURFGDI *SourceGDI, SURFGDI *DestGDI, SURFGDI *SourceGDI,
PRECTL DestRect, POINTL *SourcePoint, PRECTL DestRect, POINTL *SourcePoint,
PBRUSHOBJ Brush, PPOINTL BrushOrigin, PBRUSHOBJ Brush, PPOINTL BrushOrigin,
@ -329,7 +412,9 @@ DIB_1BPP_BitBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
return TRUE; return TRUE;
} }
BOOLEAN DIB_1BPP_StretchBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf, BOOLEAN
DIB_1BPP_StretchBlt (
SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
SURFGDI *DestGDI, SURFGDI *SourceGDI, SURFGDI *DestGDI, SURFGDI *SourceGDI,
RECTL* DestRect, RECTL *SourceRect, RECTL* DestRect, RECTL *SourceRect,
POINTL* MaskOrigin, POINTL* BrushOrigin, POINTL* MaskOrigin, POINTL* BrushOrigin,