for 24bpp

1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732
2025-08-05 08:03:01 +00:00 · 2006-08-26 23:26:39 +00:00 · 2006-08-26 23:26:39 +00:00 · 6518b5361d
commit 6518b5361d
parent c5edc7e6d3
6 changed files with 466 additions and 310 deletions
--- a/reactos/subsystems/win32/win32k/dib/dib24bpp.c
+++ b/reactos/subsystems/win32/win32k/dib/dib24bpp.c
@ -38,105 +38,7 @@ DIB_24BPP_GetPixel(SURFOBJ *SurfObj, LONG x, LONG y)
  return *(PUSHORT)(addr) + (*(addr + 2) << 16);
 }

-VOID
-DIB_24BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
-{
-  PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
-  ULONG Count = x2 - x1;
-#if !defined(_M_IX86) || defined(_MSC_VER)
-  ULONG MultiCount;
-  ULONG Fill[3];
-#endif

-  if (Count < 8)
-    {
-      /* For small fills, don't bother doing anything fancy */
-      while (Count--)
-        {
-          *(PUSHORT)(addr) = c;
-          addr += 2;
-          *(addr) = c >> 16;
-          addr += 1;
-        }
-    }
-  else
-    {
-      /* Align to 4-byte address */
-      while (0 != ((ULONG_PTR) addr & 0x3))
-        {
-          *(PUSHORT)(addr) = c;
-          addr += 2;
-          *(addr) = c >> 16;
-          addr += 1;
-          Count--;
-        }
-      /* If the color we need to fill with is 0ABC, then the final mem pattern
-       * (note little-endianness) would be:
-       *
-       * |C.B.A|C.B.A|C.B.A|C.B.A|   <- pixel borders
-       * |C.B.A.C|B.A.C.B|A.C.B.A|   <- ULONG borders
-       *
-       * So, taking endianness into account again, we need to fill with these
-       * ULONGs: CABC BCAB ABCA */
-#if defined(_M_IX86) && !defined(_MSC_VER)
-       /* This is about 30% faster than the generic C code below */
-       __asm__ __volatile__ (
-"      movl %1, %%ecx\n"
-"      andl $0xffffff, %%ecx\n"         /* 0ABC */
-"      movl %%ecx, %%ebx\n"             /* Construct BCAB in ebx */
-"      shrl $8, %%ebx\n"
-"      movl %%ecx, %%eax\n"
-"      shll $16, %%eax\n"
-"      orl  %%eax, %%ebx\n"
-"      movl %%ecx, %%edx\n"             /* Construct ABCA in edx */
-"      shll $8, %%edx\n"
-"      movl %%ecx, %%eax\n"
-"      shrl $16, %%eax\n"
-"      orl  %%eax, %%edx\n"
-"      movl %%ecx, %%eax\n"             /* Construct CABC in eax */
-"      shll $24, %%eax\n"
-"      orl  %%ecx, %%eax\n"
-"      movl %2, %%ecx\n"                /* Load count */
-"      shr  $2, %%ecx\n"
-"      movl %3, %%edi\n"                /* Load dest */
-"0:\n"
-"      movl %%eax, (%%edi)\n"           /* Store 4 pixels, 12 bytes */
-"      movl %%ebx, 4(%%edi)\n"
-"      movl %%edx, 8(%%edi)\n"
-"      addl $12, %%edi\n"
-"      dec  %%ecx\n"
-"      jnz  0b\n"
-"      movl %%edi, %0\n"
-  : "=m"(addr)
-  : "m"(c), "m"(Count), "m"(addr)
-  : "%eax", "%ebx", "%ecx", "%edx", "%edi");
-#else
-      c = c & 0xffffff;                /* 0ABC */
-      Fill[0] = c | (c << 24);         /* CABC */
-      Fill[1] = (c >> 8) | (c << 16);  /* BCAB */
-      Fill[2] = (c << 8) | (c >> 16);  /* ABCA */
-      MultiCount = Count / 4;
-      do
-        {
-          *(PULONG)addr = Fill[0];
-          addr += 4;
-          *(PULONG)addr = Fill[1];
-          addr += 4;
-          *(PULONG)addr = Fill[2];
-          addr += 4;
-        }
-      while (0 != --MultiCount);
-#endif
-      Count = Count & 0x03;
-      while (0 != Count--)
-        {
-          *(PUSHORT)(addr) = c;
-          addr += 2;
-          *(addr) = c >> 16;
-          addr += 1;
-        }
-    }
-}

 VOID
 DIB_24BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
--- a/reactos/subsystems/win32/win32k/dib/dib24bppc.c
+++ b/reactos/subsystems/win32/win32k/dib/dib24bppc.c
@ -0,0 +1,89 @@
+/*
+ *  ReactOS W32 Subsystem
+ *  Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 ReactOS Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+/* $Id$ */
+
+#include <w32k.h>
+
+#define NDEBUG
+#include <debug.h>
+
+VOID
+DIB_24BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
+{
+  PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
+  ULONG Count = x2 - x1;
+
+
+  if (Count < 8)
+    {
+      /* For small fills, don't bother doing anything fancy */
+      while (Count--)
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+        }
+    }
+  else
+    {
+      /* Align to 4-byte address */
+      while (0 != ((ULONG_PTR) addr & 0x3))
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+          Count--;
+        }
+      /* If the color we need to fill with is 0ABC, then the final mem pattern
+       * (note little-endianness) would be:
+       *
+       * |C.B.A|C.B.A|C.B.A|C.B.A|   <- pixel borders
+       * |C.B.A.C|B.A.C.B|A.C.B.A|   <- ULONG borders
+       *
+       * So, taking endianness into account again, we need to fill with these
+       * ULONGs: CABC BCAB ABCA */
+
+      c = c & 0xffffff;                /* 0ABC */
+      Fill[0] = c | (c << 24);         /* CABC */
+      Fill[1] = (c >> 8) | (c << 16);  /* BCAB */
+      Fill[2] = (c << 8) | (c >> 16);  /* ABCA */
+      MultiCount = Count / 4;
+      do
+        {
+          *(PULONG)addr = Fill[0];
+          addr += 4;
+          *(PULONG)addr = Fill[1];
+          addr += 4;
+          *(PULONG)addr = Fill[2];
+          addr += 4;
+        }
+      while (0 != --MultiCount);
+
+      Count = Count & 0x03;
+      while (0 != Count--)
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+        }
+    }
+}
--- a/reactos/subsystems/win32/win32k/dib/i386/dib24bpp_hline.s
+++ b/reactos/subsystems/win32/win32k/dib/i386/dib24bpp_hline.s
@ -0,0 +1,165 @@
+/* 
+ *  ReactOS W32 Subsystem
+ *  Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 2004, 2005, 2006 ReactOS Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+/* $Id: */
+
+.globl _DIB_24BPP_HLine
+.intel_syntax noprefix
+
+.def   _DIB_24BPP_HLine;
+.scl	2;
+.type	32;
+.endef
+      _DIB_24BPP_HLine:
+                         push    edi
+                         push    esi
+                         push    ebx
+                         sub     esp, 24
+                         mov     ebx, [esp+40]
+                         mov     edi, [esp+52]
+                         mov     ecx, [esp+44]
+                         mov     eax, [ebx+36]
+                         mov     esi, [ebx+32]
+                         mov     edx, [esp+48]
+                         imul    eax, edi
+                         sub     edx, ecx
+                         mov     [esp], edx
+                         add     eax, esi
+                         lea     eax, [eax+ecx*2]
+                         add     eax, ecx
+                         cmp     edx, 7
+                         mov     esi, edx
+                         mov     [esp+4], eax
+                         ja      Align4byte
+                         lea     eax, [edx-1]                                                  
+                         mov     [esp], eax
+                         inc     eax
+                         jnz     small_fill
+                         add     esp, 24
+                         pop     ebx
+                         pop     esi
+                         pop     edi
+                         ret
+                    
+                        /* For small fills, don't bother doing anything fancy */
+               small_fill:
+                         movzx   ecx, word ptr [esp+58]
+                         mov     edx, [esp+4]
+                         mov     esi, [esp+56]
+                         lea     eax, [edx+2]
+                         mov     [esp+4], eax
+                         mov     [edx+2], cl
+                         mov     eax, [esp]
+                         inc     dword ptr [esp+4]
+                         mov     [edx], si
+                         dec     eax
+                         mov     [esp], eax
+                         inc     eax
+                         jnz     small_fill
+                         add     esp, 24
+                         pop     ebx
+                         pop     esi
+                         pop     edi
+                         ret
+                         
+               Align4byte:
+                         /* Align to 4-byte address */ 
+                         test    al, 3
+                         mov     ecx, eax
+                         jz      loop1
+                         lea     esi, [esi+0]
+                         lea     edi, [edi+0]
+                         
+           loopasmversion:
+                        /* This is about 30% faster than the generic C code below */
+                         movzx   edx, word ptr [esp+58]
+                         lea     edi, [ecx+2]
+                         mov     eax, [esp+56]
+                         mov     [esp+4], edi
+                         mov     [ecx+2], dl
+                         mov     ebx, [esp+4]
+                         mov     [ecx], ax
+                         mov     edx, [esp]
+                         inc     ebx
+                         mov     [esp+4], ebx
+                         dec     edx
+                         test    bl, 3
+                         mov     [esp], edx
+                         mov     ecx, ebx
+                         jnz     loopasmversion
+                         mov     esi, edx
+
+                   loop1:
+                         mov     ecx, [esp+56]
+                         and     ecx, 16777215
+                         mov     ebx, ecx
+                         shr     ebx, 8
+                         mov     eax, ecx
+                         shl     eax, 16
+                         or      ebx, eax
+                         mov     edx, ecx
+                         shl     edx, 8
+                         mov     eax, ecx
+                         shr     eax, 16
+                         or      edx, eax
+                         mov     eax, ecx
+                         shl     eax, 24
+                         or      eax, ecx
+                         mov     ecx, [esp]
+                         shr     ecx, 2
+                         mov     edi, [esp+4]
+                    loop2:
+                         mov     [edi], eax
+                         mov     [edi+4], ebx
+                         mov     [edi+8], edx
+                         add     edi, 12
+                         dec     ecx
+                         jnz     loop2
+                         mov     [esp+4], edi
+                         and     esi, 3
+                         lea     eax, [esi-1]
+                         mov     [esp], eax
+                         inc     eax
+                         jnz     leftoverfromthemainloop
+                         add     esp, 24
+                         pop     ebx
+                         pop     esi
+                         pop     edi
+                         ret
+
+  leftoverfromthemainloop:
+
+                        /*  Count = Count & 0x03; */
+                         mov     ecx, [esp+4]
+                         mov     ebx, [esp+56]
+                         lea     esi, [ecx+2]
+                         mov     [ecx], bx
+                         shr     ebx, 16
+                         mov     [esp+4], esi
+                         mov     [ecx+2], bl
+                         mov     eax, [esp]
+                         inc     dword ptr [esp+4]
+                         dec     eax
+                         mov     [esp], eax
+                         inc     eax
+                         jnz     leftoverfromthemainloop
+                         add     esp, 24
+                         pop     ebx
+                         pop     esi
+                         pop     edi
+                         ret