for 24bpp

1. Remove inline asm for hline and implement a asm version of it in intel syntax. 
   (optimzeing of this version can be done better, but it for be done for now)
2. set eorly style on file I forget. 



svn path=/trunk/; revision=23732
This commit is contained in:
Magnus Olsen 2006-08-26 23:26:39 +00:00
parent c5edc7e6d3
commit 6518b5361d
6 changed files with 466 additions and 310 deletions

View file

@ -38,105 +38,7 @@ DIB_24BPP_GetPixel(SURFOBJ *SurfObj, LONG x, LONG y)
return *(PUSHORT)(addr) + (*(addr + 2) << 16);
}
VOID
DIB_24BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
{
PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
ULONG Count = x2 - x1;
#if !defined(_M_IX86) || defined(_MSC_VER)
ULONG MultiCount;
ULONG Fill[3];
#endif
if (Count < 8)
{
/* For small fills, don't bother doing anything fancy */
while (Count--)
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
}
}
else
{
/* Align to 4-byte address */
while (0 != ((ULONG_PTR) addr & 0x3))
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
Count--;
}
/* If the color we need to fill with is 0ABC, then the final mem pattern
* (note little-endianness) would be:
*
* |C.B.A|C.B.A|C.B.A|C.B.A| <- pixel borders
* |C.B.A.C|B.A.C.B|A.C.B.A| <- ULONG borders
*
* So, taking endianness into account again, we need to fill with these
* ULONGs: CABC BCAB ABCA */
#if defined(_M_IX86) && !defined(_MSC_VER)
/* This is about 30% faster than the generic C code below */
__asm__ __volatile__ (
" movl %1, %%ecx\n"
" andl $0xffffff, %%ecx\n" /* 0ABC */
" movl %%ecx, %%ebx\n" /* Construct BCAB in ebx */
" shrl $8, %%ebx\n"
" movl %%ecx, %%eax\n"
" shll $16, %%eax\n"
" orl %%eax, %%ebx\n"
" movl %%ecx, %%edx\n" /* Construct ABCA in edx */
" shll $8, %%edx\n"
" movl %%ecx, %%eax\n"
" shrl $16, %%eax\n"
" orl %%eax, %%edx\n"
" movl %%ecx, %%eax\n" /* Construct CABC in eax */
" shll $24, %%eax\n"
" orl %%ecx, %%eax\n"
" movl %2, %%ecx\n" /* Load count */
" shr $2, %%ecx\n"
" movl %3, %%edi\n" /* Load dest */
"0:\n"
" movl %%eax, (%%edi)\n" /* Store 4 pixels, 12 bytes */
" movl %%ebx, 4(%%edi)\n"
" movl %%edx, 8(%%edi)\n"
" addl $12, %%edi\n"
" dec %%ecx\n"
" jnz 0b\n"
" movl %%edi, %0\n"
: "=m"(addr)
: "m"(c), "m"(Count), "m"(addr)
: "%eax", "%ebx", "%ecx", "%edx", "%edi");
#else
c = c & 0xffffff; /* 0ABC */
Fill[0] = c | (c << 24); /* CABC */
Fill[1] = (c >> 8) | (c << 16); /* BCAB */
Fill[2] = (c << 8) | (c >> 16); /* ABCA */
MultiCount = Count / 4;
do
{
*(PULONG)addr = Fill[0];
addr += 4;
*(PULONG)addr = Fill[1];
addr += 4;
*(PULONG)addr = Fill[2];
addr += 4;
}
while (0 != --MultiCount);
#endif
Count = Count & 0x03;
while (0 != Count--)
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
}
}
}
VOID
DIB_24BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)

View file

@ -0,0 +1,89 @@
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id$ */
#include <w32k.h>
#define NDEBUG
#include <debug.h>
VOID
DIB_24BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
{
PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
ULONG Count = x2 - x1;
if (Count < 8)
{
/* For small fills, don't bother doing anything fancy */
while (Count--)
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
}
}
else
{
/* Align to 4-byte address */
while (0 != ((ULONG_PTR) addr & 0x3))
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
Count--;
}
/* If the color we need to fill with is 0ABC, then the final mem pattern
* (note little-endianness) would be:
*
* |C.B.A|C.B.A|C.B.A|C.B.A| <- pixel borders
* |C.B.A.C|B.A.C.B|A.C.B.A| <- ULONG borders
*
* So, taking endianness into account again, we need to fill with these
* ULONGs: CABC BCAB ABCA */
c = c & 0xffffff; /* 0ABC */
Fill[0] = c | (c << 24); /* CABC */
Fill[1] = (c >> 8) | (c << 16); /* BCAB */
Fill[2] = (c << 8) | (c >> 16); /* ABCA */
MultiCount = Count / 4;
do
{
*(PULONG)addr = Fill[0];
addr += 4;
*(PULONG)addr = Fill[1];
addr += 4;
*(PULONG)addr = Fill[2];
addr += 4;
}
while (0 != --MultiCount);
Count = Count & 0x03;
while (0 != Count--)
{
*(PUSHORT)(addr) = c;
addr += 2;
*(addr) = c >> 16;
addr += 1;
}
}
}

View file

@ -1,51 +1,51 @@
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
#include <w32k.h>
#define NDEBUG
#include <debug.h>
VOID
DIB_32BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
{
PBYTE byteaddr = (ULONG_PTR)SurfObj->pvScan0 + y * SurfObj->lDelta;
PDWORD addr = (PDWORD)byteaddr + x1;
LONG cx = x1;
while(cx < x2)
{
*addr = (DWORD)c;
++addr;
++cx;
}
}
BOOLEAN
DIB_32BPP_ColorFill(SURFOBJ* DestSurface, RECTL* DestRect, ULONG color)
{
ULONG DestY;
for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
{
DIB_32BPP_HLine (DestSurface, DestRect->left, DestRect->right, DestY, color);
}
return TRUE;
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
#include <w32k.h>
#define NDEBUG
#include <debug.h>
VOID
DIB_32BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
{
PBYTE byteaddr = (ULONG_PTR)SurfObj->pvScan0 + y * SurfObj->lDelta;
PDWORD addr = (PDWORD)byteaddr + x1;
LONG cx = x1;
while(cx < x2)
{
*addr = (DWORD)c;
++addr;
++cx;
}
}
BOOLEAN
DIB_32BPP_ColorFill(SURFOBJ* DestSurface, RECTL* DestRect, ULONG color)
{
ULONG DestY;
for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
{
DIB_32BPP_HLine (DestSurface, DestRect->left, DestRect->right, DestY, color);
}
return TRUE;
}

View file

@ -0,0 +1,165 @@
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
.globl _DIB_24BPP_HLine
.intel_syntax noprefix
.def _DIB_24BPP_HLine;
.scl 2;
.type 32;
.endef
_DIB_24BPP_HLine:
push edi
push esi
push ebx
sub esp, 24
mov ebx, [esp+40]
mov edi, [esp+52]
mov ecx, [esp+44]
mov eax, [ebx+36]
mov esi, [ebx+32]
mov edx, [esp+48]
imul eax, edi
sub edx, ecx
mov [esp], edx
add eax, esi
lea eax, [eax+ecx*2]
add eax, ecx
cmp edx, 7
mov esi, edx
mov [esp+4], eax
ja Align4byte
lea eax, [edx-1]
mov [esp], eax
inc eax
jnz small_fill
add esp, 24
pop ebx
pop esi
pop edi
ret
/* For small fills, don't bother doing anything fancy */
small_fill:
movzx ecx, word ptr [esp+58]
mov edx, [esp+4]
mov esi, [esp+56]
lea eax, [edx+2]
mov [esp+4], eax
mov [edx+2], cl
mov eax, [esp]
inc dword ptr [esp+4]
mov [edx], si
dec eax
mov [esp], eax
inc eax
jnz small_fill
add esp, 24
pop ebx
pop esi
pop edi
ret
Align4byte:
/* Align to 4-byte address */
test al, 3
mov ecx, eax
jz loop1
lea esi, [esi+0]
lea edi, [edi+0]
loopasmversion:
/* This is about 30% faster than the generic C code below */
movzx edx, word ptr [esp+58]
lea edi, [ecx+2]
mov eax, [esp+56]
mov [esp+4], edi
mov [ecx+2], dl
mov ebx, [esp+4]
mov [ecx], ax
mov edx, [esp]
inc ebx
mov [esp+4], ebx
dec edx
test bl, 3
mov [esp], edx
mov ecx, ebx
jnz loopasmversion
mov esi, edx
loop1:
mov ecx, [esp+56]
and ecx, 16777215
mov ebx, ecx
shr ebx, 8
mov eax, ecx
shl eax, 16
or ebx, eax
mov edx, ecx
shl edx, 8
mov eax, ecx
shr eax, 16
or edx, eax
mov eax, ecx
shl eax, 24
or eax, ecx
mov ecx, [esp]
shr ecx, 2
mov edi, [esp+4]
loop2:
mov [edi], eax
mov [edi+4], ebx
mov [edi+8], edx
add edi, 12
dec ecx
jnz loop2
mov [esp+4], edi
and esi, 3
lea eax, [esi-1]
mov [esp], eax
inc eax
jnz leftoverfromthemainloop
add esp, 24
pop ebx
pop esi
pop edi
ret
leftoverfromthemainloop:
/* Count = Count & 0x03; */
mov ecx, [esp+4]
mov ebx, [esp+56]
lea esi, [ecx+2]
mov [ecx], bx
shr ebx, 16
mov [esp+4], esi
mov [ecx+2], bl
mov eax, [esp]
inc dword ptr [esp+4]
dec eax
mov [esp], eax
inc eax
jnz leftoverfromthemainloop
add esp, 24
pop ebx
pop esi
pop edi
ret

View file

@ -1,93 +1,93 @@
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
.globl _DIB_32BPP_ColorFill
.intel_syntax noprefix
.def _DIB_32BPP_ColorFill;
.scl 2;
.type 32;
.endef
_DIB_32BPP_ColorFill:
sub esp, 24
mov ecx, [esp+32]
mov [esp+8], ebx
mov ebx, [esp+28]
mov [esp+20], ebp
mov ebp, [esp+36]
mov [esp+12], esi
mov [esp+16], edi
mov edi, [ecx]
mov esi, [ecx+8]
mov edx, [ebx+36]
sub esi, edi
mov edi, [ecx+4]
mov eax, edi
imul eax, edx
add eax, [ebx+32]
mov ebx, [ecx]
lea eax, [eax+ebx*4]
mov [esp+4], eax
mov eax, [ecx+12]
cmp eax, edi
jbe end
sub eax, edi
mov [esp], eax
lea esi, [esi+0]
for_loop:
mov eax, ebp
cld
mov ebx, esi
mov edi, [esp+4]
test edi, 3
jnz algin_draw
mov ecx, esi
rep stosd
add [esp+4], edx
dec dword ptr [esp]
jnz for_loop
end:
mov ebx, [esp+8]
mov eax, 1
mov esi, [esp+12]
mov edi, [esp+16]
mov ebp, [esp+20]
add esp, 24
ret
algin_draw:
stosd
dec ebx
mov ecx, ebx
rol eax, 16
stosd
add [esp+4], edx
dec dword ptr [esp]
jnz for_loop
mov ebx, [esp+8]
mov eax, 1
mov esi, [esp+12]
mov edi, [esp+16]
mov ebp, [esp+20]
add esp, 24
ret
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
.globl _DIB_32BPP_ColorFill
.intel_syntax noprefix
.def _DIB_32BPP_ColorFill;
.scl 2;
.type 32;
.endef
_DIB_32BPP_ColorFill:
sub esp, 24
mov ecx, [esp+32]
mov [esp+8], ebx
mov ebx, [esp+28]
mov [esp+20], ebp
mov ebp, [esp+36]
mov [esp+12], esi
mov [esp+16], edi
mov edi, [ecx]
mov esi, [ecx+8]
mov edx, [ebx+36]
sub esi, edi
mov edi, [ecx+4]
mov eax, edi
imul eax, edx
add eax, [ebx+32]
mov ebx, [ecx]
lea eax, [eax+ebx*4]
mov [esp+4], eax
mov eax, [ecx+12]
cmp eax, edi
jbe end
sub eax, edi
mov [esp], eax
lea esi, [esi+0]
for_loop:
mov eax, ebp
cld
mov ebx, esi
mov edi, [esp+4]
test edi, 3
jnz algin_draw
mov ecx, esi
rep stosd
add [esp+4], edx
dec dword ptr [esp]
jnz for_loop
end:
mov ebx, [esp+8]
mov eax, 1
mov esi, [esp+12]
mov edi, [esp+16]
mov ebp, [esp+20]
add esp, 24
ret
algin_draw:
stosd
dec ebx
mov ecx, ebx
rol eax, 16
stosd
add [esp+4], edx
dec dword ptr [esp]
jnz for_loop
mov ebx, [esp+8]
mov eax, 1
mov esi, [esp+12]
mov edi, [esp+16]
mov ebp, [esp+20]
add esp, 24
ret

View file

@ -1,69 +1,69 @@
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
.globl _DIB_32BPP_HLine
.intel_syntax noprefix
.def _DIB_32BPP_HLine;
.scl 2;
.type 32;
.endef
_DIB_32BPP_HLine:
sub esp, 12 // rember the base is not hex it is dec
mov ecx, [esp+16]
mov [esp+4], ebx
mov edx, [esp+20] // edx = LONG x1
mov [esp+8], edi
mov edi, [esp+28]
mov eax, [ecx+36]
mov ebx, [esp+24] // ebx = LONG x2
imul eax, edi
mov edi, [ecx+32]
sub ebx, edx // cx = (x2 - x1) ;
add eax, edi
lea edx, [eax+edx*4]
mov [esp], edx
cld
mov eax, [esp+32]
mov edi, [esp]
test edi, 3 // Align to fullword boundary
jnz short _save_rest
mov ecx, ebx // Setup count of fullwords to fill
rep stosd
mov ebx, [esp+4]
mov edi, [esp+8]
add esp, 12
ret
_save_rest:
stosw
ror eax, 16
mov ecx, ebx // Setup count of fullwords to fill
dec ecx
rep stosd // The actual fill
shr eax, 16
stosw
mov ebx, [esp+4]
mov edi, [esp+8]
add esp, 12
ret
/*
* ReactOS W32 Subsystem
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: */
.globl _DIB_32BPP_HLine
.intel_syntax noprefix
.def _DIB_32BPP_HLine;
.scl 2;
.type 32;
.endef
_DIB_32BPP_HLine:
sub esp, 12 // rember the base is not hex it is dec
mov ecx, [esp+16]
mov [esp+4], ebx
mov edx, [esp+20] // edx = LONG x1
mov [esp+8], edi
mov edi, [esp+28]
mov eax, [ecx+36]
mov ebx, [esp+24] // ebx = LONG x2
imul eax, edi
mov edi, [ecx+32]
sub ebx, edx // cx = (x2 - x1) ;
add eax, edi
lea edx, [eax+edx*4]
mov [esp], edx
cld
mov eax, [esp+32]
mov edi, [esp]
test edi, 3 // Align to fullword boundary
jnz short _save_rest
mov ecx, ebx // Setup count of fullwords to fill
rep stosd
mov ebx, [esp+4]
mov edi, [esp+8]
add esp, 12
ret
_save_rest:
stosw
ror eax, 16
mov ecx, ebx // Setup count of fullwords to fill
dec ecx
rep stosd // The actual fill
shr eax, 16
stosw
mov ebx, [esp+4]
mov edi, [esp+8]
add esp, 12
ret