From 141eee9683ee2b61bd59fa6c59752ab090e8d09c Mon Sep 17 00:00:00 2001 From: Timo Kreuzer Date: Thu, 22 Mar 2012 20:12:50 +0000 Subject: [PATCH] [DIBLIB] - Implement spacial even more optimized versions of SRCCOPY on equal surfaces for 8,16,24 and 32bpp. - Add the right-to-left versions to the function tables - Add _CALCSHIFT macro(s) to calculate the shift in the dib functions, instead of precalculating it in EngBitBlt. This costs us a few bytes per function (yes, with so many functions, every byte counts!) but since it's only for 1 and 4 bpp and improves the code by keeping DIB specific code out of the Eng function, it's reasonable to do so. - Add optional optimization pragmas svn path=/trunk/; revision=56212 --- .../win32/win32k/diblib/BitBlt_SRCCOPY.c | 50 +++++++++++++++++++ .../subsystems/win32/win32k/diblib/DibLib.c | 2 + .../subsystems/win32/win32k/diblib/DibLib.h | 30 ++++++++--- .../win32/win32k/diblib/DibLib_AllSrcBPP.h | 10 +++- .../win32/win32k/diblib/DibLib_BitBlt.h | 13 +++-- .../win32/win32k/diblib/DibLib_interface.h | 8 ++- 6 files changed, 97 insertions(+), 16 deletions(-) diff --git a/reactos/subsystems/win32/win32k/diblib/BitBlt_SRCCOPY.c b/reactos/subsystems/win32/win32k/diblib/BitBlt_SRCCOPY.c index 62ddec51eff..75d691bdd11 100644 --- a/reactos/subsystems/win32/win32k/diblib/BitBlt_SRCCOPY.c +++ b/reactos/subsystems/win32/win32k/diblib/BitBlt_SRCCOPY.c @@ -1,6 +1,56 @@ #include "DibLib.h" +VOID +FASTCALL +Dib_BitBlt_SRCCOPY_EqSurf(PBLTDATA pBltData) +{ + ULONG cLines, cjWidth = pBltData->ulWidth * pBltData->jDstBpp; + PBYTE pjDestBase = pBltData->siDst.pjBase; + PBYTE pjSrcBase = pBltData->siSrc.pjBase; + + /* Loop all lines */ + cLines = pBltData->ulHeight; + while (cLines--) + { + memcpy(pjDestBase, pjSrcBase, cjWidth); + pjDestBase += pBltData->siDst.lDelta; + pjSrcBase += pBltData->siSrc.lDelta; + } +} + +#define Dib_BitBlt_SRCCOPY_S8_D8_EqSurf Dib_BitBlt_SRCCOPY_EqSurf +#define Dib_BitBlt_SRCCOPY_S16_D16_EqSurf Dib_BitBlt_SRCCOPY_EqSurf +#define Dib_BitBlt_SRCCOPY_S24_D24_EqSurf Dib_BitBlt_SRCCOPY_EqSurf + +/* special movsd optimization on x86 */ +#if defined(_M_IX86) || defined(_M_AMD64) +VOID +FASTCALL +Dib_BitBlt_SRCCOPY_S32_D32_EqSurf(PBLTDATA pBltData) +{ + ULONG cLines, cRows = pBltData->ulWidth; + PBYTE pjDestBase = pBltData->siDst.pjBase; + PBYTE pjSrcBase = pBltData->siSrc.pjBase; + + /* Loop all lines */ + cLines = pBltData->ulHeight; + while (cLines--) + { + __movsd((PULONG)pjDestBase, (PULONG)pjSrcBase, cRows); + pjDestBase += pBltData->siDst.lDelta; + pjSrcBase += pBltData->siSrc.lDelta; + } +} +#else +#define Dib_BitBlt_SRCCOPY_S32_D32_EqSurf Dib_BitBlt_SRCCOPY_EqSurf +#endif + +#define Dib_BitBlt_SRCCOPY_S8_D8_EqSurf_manual 1 +#define Dib_BitBlt_SRCCOPY_S16_D16_EqSurf_manual 1 +#define Dib_BitBlt_SRCCOPY_S24_D24_EqSurf_manual 1 +#define Dib_BitBlt_SRCCOPY_S32_D32_EqSurf_manual 1 + #define __USES_SOURCE 1 #define __USES_PATTERN 0 #define __USES_DEST 0 diff --git a/reactos/subsystems/win32/win32k/diblib/DibLib.c b/reactos/subsystems/win32/win32k/diblib/DibLib.c index c62e8dcfe52..5a11d574a6e 100644 --- a/reactos/subsystems/win32/win32k/diblib/DibLib.c +++ b/reactos/subsystems/win32/win32k/diblib/DibLib.c @@ -1,6 +1,8 @@ #include "DibLib.h" +BYTE ajShift4[2] = {4, 0}; + enum { INDEX_BitBlt_NOOP, diff --git a/reactos/subsystems/win32/win32k/diblib/DibLib.h b/reactos/subsystems/win32/win32k/diblib/DibLib.h index 50ae83ffabb..59612bdb1cd 100644 --- a/reactos/subsystems/win32/win32k/diblib/DibLib.h +++ b/reactos/subsystems/win32/win32k/diblib/DibLib.h @@ -9,6 +9,21 @@ #include #include #include + +#ifdef _OPTIMIZE_DIBLIB +#ifdef _MSC_VER +#pragma optimize("g", on) +#else +#pragma GCC optimize("O3") +#endif +#endif + +typedef +ULONG +(NTAPI *PFN_XLATE)(XLATEOBJ* pxlo, ULONG ulColor); + +extern BYTE ajShift4[2]; + #include "DibLib_interface.h" #define _DibXlate(pBltData, ulColor) (pBltData->pfnXlate(pBltData->pxlo, ulColor)) @@ -16,51 +31,54 @@ #define __PASTE_(s1,s2) s1##s2 #define __PASTE(s1,s2) __PASTE_(s1,s2) -#define __DIB_FUNCTION_NAME_SRCDSTEQ2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp ## _EqSurf -#define __DIB_FUNCTION_NAME_SRCDSTEQ(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDSTEQ2(name, src_bpp, dst_bpp) - -#define __DIB_FUNCTION_NAME_SRCDSTEQR2L2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp ## _EqSurfR2L -#define __DIB_FUNCTION_NAME_SRCDSTEQR2L(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDSTEQR2L2(name, src_bpp, dst_bpp) - #define __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp #define __DIB_FUNCTION_NAME_SRCDST(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) #define __DIB_FUNCTION_NAME_DST2(name, dst_bpp) Dib_ ## name ## _D ## dst_bpp #define __DIB_FUNCTION_NAME_DST(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_DST2(name, dst_bpp) +#define __DIB_FUNCTION_NAME_SRCDSTEQ(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurf +#define __DIB_FUNCTION_NAME_SRCDSTEQL2R(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurfL2R +#define __DIB_FUNCTION_NAME_SRCDSTEQR2L(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurfR2L #define _ReadPixel_1(pjSource, jShift) (((*(pjSource)) >> (jShift)) & 1) #define _WritePixel_1(pjDest, jShift, ulColor) (void)(*(pjDest) = (UCHAR)((*(pjDest) & ~(1<<(jShift))) | ((ulColor)<<(jShift)))) #define _NextPixel_1(ppj, pjShift) (void)((*(pjShift))--, *(pjShift) &= 7, (*(ppj) += (*(pjShift) >> 5))) #define _NextPixelR2L_1(ppj, pjShift) (void)((*(ppj) -= (*(pjShift) >> 5)), (*(pjShift))++, *(pjShift) &= 7) #define _SHIFT_1(x) x +#define _CALCSHIFT_1(pShift, x) (void)(*(pShift) = (7 - ((x) & 7))) #define _ReadPixel_4(pjSource, jShift) (((*(pjSource)) >> (jShift)) & 15) #define _WritePixel_4(pjDest, jShift, ulColor) (void)(*(pjDest) = (UCHAR)((*(pjDest) & ~(15<<(jShift))) | ((ulColor)<<(jShift)))) #define _NextPixel_4(ppj, pjShift) (void)((*(ppj) += (*(pjShift) & 1)), (*(pjShift)) -= 4, *(pjShift) &= 7) #define _NextPixelR2L_4(ppj, pjShift) (void)((*(pjShift)) -= 4, *(pjShift) &= 7, (*(ppj) -= (*(pjShift) & 1))) #define _SHIFT_4(x) x +#define _CALCSHIFT_4(pShift, x) (void)(*(pShift) = ajShift4[(x) & 1]) #define _ReadPixel_8(pjSource, x) (*(UCHAR*)(pjSource)) #define _WritePixel_8(pjDest, x, ulColor) (void)(*(UCHAR*)(pjDest) = (UCHAR)(ulColor)) #define _NextPixel_8(ppj, pjShift) (void)(*(ppj) += 1) #define _NextPixelR2L_8(ppj, pjShift) (void)(*(ppj) -= 1) #define _SHIFT_8(x) +#define _CALCSHIFT_8(pShift, x) #define _ReadPixel_16(pjSource, x) (*(USHORT*)(pjSource)) #define _WritePixel_16(pjDest, x, ulColor) (void)(*(USHORT*)(pjDest) = (USHORT)(ulColor)) #define _NextPixel_16(ppj, pjShift) (void)(*(ppj) -= 2) #define _NextPixelR2L_16(ppj, pjShift) (void)(*(ppj) += 2) #define _SHIFT_16(x) +#define _CALCSHIFT_16(pShift, x) #define _ReadPixel_24(pjSource, x) ((pjSource)[0] | ((pjSource)[1] << 8) | ((pjSource)[2] << 16)) #define _WritePixel_24(pjDest, x, ulColor) (void)(((pjDest)[0] = ((ulColor)&0xFF)),((pjDest)[1] = (((ulColor)>>8)&0xFF)),((pjDest)[2] = (((ulColor)>>16)&0xFF))) #define _NextPixel_24(ppj, pjShift) (void)(*(ppj) -= 3) #define _NextPixelR2L_24(ppj, pjShift) (void)(*(ppj) += 3) #define _SHIFT_24(x) +#define _CALCSHIFT_24(pShift, x) #define _ReadPixel_32(pjSource, x) (*(ULONG*)(pjSource)) #define _WritePixel_32(pjDest, x, ulColor) (void)(*(ULONG*)(pjDest) = (ulColor)) #define _NextPixel_32(ppj, pjShift) (void)(*(ppj) += 4) #define _NextPixelR2L_32(ppj, pjShift) (void)(*(ppj) -= 4) #define _SHIFT_32(x) +#define _CALCSHIFT_32(pShift, x) diff --git a/reactos/subsystems/win32/win32k/diblib/DibLib_AllSrcBPP.h b/reactos/subsystems/win32/win32k/diblib/DibLib_AllSrcBPP.h index b6c03285d31..bdeef613483 100644 --- a/reactos/subsystems/win32/win32k/diblib/DibLib_AllSrcBPP.h +++ b/reactos/subsystems/win32/win32k/diblib/DibLib_AllSrcBPP.h @@ -44,7 +44,15 @@ PFN_DIBFUNCTION __PASTE(gapfn, __FUNCTIONNAME)[7][7] = { - {0, 0, 0, 0, 0, 0}, + { + 0, + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 1, 1), + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 4, 4), + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 8, 8), + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 16, 16), + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 24, 24), + __DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 32, 32), + }, { __DIB_FUNCTION_NAME_SRCDSTEQ(__FUNCTIONNAME, 1, 1), __DIB_FUNCTION_NAME_SRCDST(__FUNCTIONNAME, 1, 1), diff --git a/reactos/subsystems/win32/win32k/diblib/DibLib_BitBlt.h b/reactos/subsystems/win32/win32k/diblib/DibLib_BitBlt.h index ce4f0a40f61..63b9e5d6c97 100644 --- a/reactos/subsystems/win32/win32k/diblib/DibLib_BitBlt.h +++ b/reactos/subsystems/win32/win32k/diblib/DibLib_BitBlt.h @@ -9,6 +9,9 @@ #define _WritePixel(pj, jShift, c) __PASTE(_WritePixel_, _DEST_BPP)(pj, jShift, c) #define _NextPixel(bpp, ppj, pjShift) __PASTE(_NextPixel_, bpp)(ppj, pjShift) #define _SHIFT(bpp, x) __PASTE(_SHIFT_, bpp)(x) +#define _CALCSHIFT(bpp, pshift, x) __PASTE(_CALCSHIFT_, bpp)(pshift, x) + +#if (__PASTE(_DibFunction, _manual) != 1) VOID FASTCALL @@ -42,7 +45,7 @@ _DibFunction(PBLTDATA pBltData) pjPatBase = pBltData->siPat.pjBase; pjPatBase += pBltData->siPat.ptOrig.y * pBltData->siPat.lDelta; pjPattern = pjPatBase + pBltData->siPat.ptOrig.x * _DEST_BPP / 8; - _SHIFT(_DEST_BPP, jPatShift = pBltData->siPat.jShift0;) + _CALCSHIFT(_DEST_BPP, &jPatShift, pBltData->siPat.ptOrig.x); cPatLines = pBltData->ulPatHeight - pBltData->siPat.ptOrig.y; cPatRows = pBltData->ulPatWidth - pBltData->siPat.ptOrig.x; #endif @@ -57,14 +60,14 @@ _DibFunction(PBLTDATA pBltData) { /* Set current bit pointers and shifts */ pjDest = pjDestBase; - _SHIFT(_DEST_BPP, jDstShift = pBltData->siDst.jShift0;) + _CALCSHIFT(_DEST_BPP, &jDstShift, pBltData->siDst.ptOrig.x); #if __USES_SOURCE pjSource = pjSrcBase; - _SHIFT(_SOURCE_BPP, jSrcShift = pBltData->siSrc.jShift0;) + _CALCSHIFT(_SOURCE_BPP, &jSrcShift, pBltData->siSrc.ptOrig.x); #endif #if __USES_MASK pjMask = pjMaskBase; - jMskShift = pBltData->siMsk.jShift0; + _CALCSHIFT_1(&jMskShift, pBltData->siMsk.ptOrig.x); #endif /* Loop all rows */ @@ -126,5 +129,7 @@ _DibFunction(PBLTDATA pBltData) } } +#endif // manual + #undef _DibFunction #undef __FUNCTIONNAME2 diff --git a/reactos/subsystems/win32/win32k/diblib/DibLib_interface.h b/reactos/subsystems/win32/win32k/diblib/DibLib_interface.h index 15988f8a7c6..f1e3c0d3cc6 100644 --- a/reactos/subsystems/win32/win32k/diblib/DibLib_interface.h +++ b/reactos/subsystems/win32/win32k/diblib/DibLib_interface.h @@ -1,17 +1,14 @@ #include "RopFunctions.h" -typedef -ULONG -(NTAPI *PFN_XLATE)(XLATEOBJ* pxlo, ULONG ulColor); - typedef struct { ULONG iFormat; + PBYTE pvScan0; PBYTE pjBase; LONG lDelta; POINTL ptOrig; - BYTE jShift0; + BYTE jBpp; } SURFINFO; typedef struct @@ -30,6 +27,7 @@ typedef struct ULONG rop4; PFN_DOROP apfnDoRop[2]; ULONG ulSolidColor; + BYTE jDstBpp; } BLTDATA, *PBLTDATA; typedef