mirror of
https://github.com/reactos/reactos.git
synced 2024-12-27 01:24:38 +00:00
Speed up 16bpp fills by a factor 2
svn path=/trunk/; revision=13053
This commit is contained in:
parent
cc5f2d10ce
commit
8d58a5f254
1 changed files with 44 additions and 7 deletions
|
@ -40,15 +40,52 @@ DIB_16BPP_GetPixel(SURFOBJ *SurfObj, LONG x, LONG y)
|
|||
VOID
|
||||
DIB_16BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
|
||||
{
|
||||
PBYTE byteaddr = SurfObj->pvScan0 + y * SurfObj->lDelta;
|
||||
PWORD addr = (PWORD)byteaddr + x1;
|
||||
LONG cx = x1;
|
||||
PDWORD addr = (PDWORD)((PWORD)(SurfObj->pvScan0 + y * SurfObj->lDelta) + x1);
|
||||
|
||||
while(cx < x2) {
|
||||
*addr = (WORD)c;
|
||||
++addr;
|
||||
++cx;
|
||||
#ifdef _M_IX86
|
||||
/* This is about 10% faster than the generic C code below */
|
||||
LONG Count = x2 - x1;
|
||||
|
||||
__asm__(
|
||||
" cld\n"
|
||||
" andl $0xffff, %0\n" /* If the pixel value is "abcd", put "abcdabcd" in %eax */
|
||||
" mov %0, %%eax\n"
|
||||
" shl $16, %%eax\n"
|
||||
" or %0, %%eax\n"
|
||||
" test $0x01, %%edi\n" /* Align to fullword boundary */
|
||||
" jz .L1\n"
|
||||
" stosw\n"
|
||||
" dec %1\n"
|
||||
" jz .L2\n"
|
||||
".L1:\n"
|
||||
" mov %1,%%ecx\n" /* Setup count of fullwords to fill */
|
||||
" shr $1,%%ecx\n"
|
||||
" rep stosl\n" /* The actual fill */
|
||||
" test $0x01, %1\n" /* One left to do at the right side? */
|
||||
" jz .L2\n"
|
||||
" stosw\n"
|
||||
".L2:\n"
|
||||
: /* no output */
|
||||
: "r"(c), "r"(Count), "D"(addr)
|
||||
: "%eax", "%ecx");
|
||||
#else /* _M_IX86 */
|
||||
LONG cx = x1;
|
||||
DWORD cc;
|
||||
|
||||
if (0 != (cx & 0x01)) {
|
||||
*((PWORD) addr) = c;
|
||||
cx++;
|
||||
addr = (PDWORD)((PWORD)(addr) + 1);
|
||||
}
|
||||
cc = ((c & 0xffff) << 16) | (c & 0xffff);
|
||||
while(cx + 1 < x2) {
|
||||
*addr++ = cc;
|
||||
cx += 2;
|
||||
}
|
||||
if (cx < x2) {
|
||||
*((PWORD) addr) = c;
|
||||
}
|
||||
#endif /* _M_IX86 */
|
||||
}
|
||||
|
||||
VOID
|
||||
|
|
Loading…
Reference in a new issue