reactos/win32ss/gdi/dib/i386/dib24bpp_hline.s

/*
 * PROJECT:         Win32 subsystem
 * LICENSE:         See COPYING in the top level directory
 * FILE:            win32ss/gdi/dib/i386/dib24bpp_hline.s
 * PURPOSE:         ASM optimised 24bpp HLine
 * PROGRAMMERS:     Magnus Olsen
 */

#include <asm.inc>

.code

PUBLIC _DIB_24BPP_HLine

      _DIB_24BPP_HLine:
                         push    edi
                         push    esi
                         push    ebx
                         sub     esp, 24
                         mov     ebx, [esp+40]
                         mov     edi, [esp+52]
                         mov     ecx, [esp+44]
                         mov     eax, [ebx+36]
                         mov     esi, [ebx+32]
                         mov     edx, [esp+48]
                         imul    eax, edi
                         sub     edx, ecx
                         mov     [esp], edx
                         add     eax, esi
                         lea     eax, [eax+ecx*2]
                         add     eax, ecx
                         cmp     edx, 7
                         mov     esi, edx
                         mov     [esp+4], eax
                         ja      Align4byte
                         lea     eax, [edx-1]
                         mov     [esp], eax
                         inc     eax
                         jnz     small_fill
                         add     esp, 24
                         pop     ebx
                         pop     esi
                         pop     edi
                         ret

                        /* For small fills, don't bother doing anything fancy */
               small_fill:
                         movzx   ecx, word ptr [esp+58]
                         mov     edx, [esp+4]
                         mov     esi, [esp+56]
                         lea     eax, [edx+2]
                         mov     [esp+4], eax
                         mov     [edx+2], cl
                         mov     eax, [esp]
                         inc     dword ptr [esp+4]
                         mov     [edx], si
                         dec     eax
                         mov     [esp], eax
                         inc     eax
                         jnz     small_fill
                         add     esp, 24
                         pop     ebx
                         pop     esi
                         pop     edi
                         ret

               Align4byte:
                         /* Align to 4-byte address */ 
                         test    al, 3
                         mov     ecx, eax
                         jz      loop1
                         lea     esi, [esi+0]
                         lea     edi, [edi+0]

           loopasmversion:
                        /* This is about 30% faster than the generic C code below */
                         movzx   edx, word ptr [esp+58]
                         lea     edi, [ecx+2]
                         mov     eax, [esp+56]
                         mov     [esp+4], edi
                         mov     [ecx+2], dl
                         mov     ebx, [esp+4]
                         mov     [ecx], ax
                         mov     edx, [esp]
                         inc     ebx
                         mov     [esp+4], ebx
                         dec     edx
                         test    bl, 3
                         mov     [esp], edx
                         mov     ecx, ebx
                         jnz     loopasmversion
                         mov     esi, edx

                   loop1:
                         mov     ecx, [esp+56]
                         and     ecx, 16777215
                         mov     ebx, ecx
                         shr     ebx, 8
                         mov     eax, ecx
                         shl     eax, 16
                         or      ebx, eax
                         mov     edx, ecx
                         shl     edx, 8
                         mov     eax, ecx
                         shr     eax, 16
                         or      edx, eax
                         mov     eax, ecx
                         shl     eax, 24
                         or      eax, ecx
                         mov     ecx, [esp]
                         shr     ecx, 2
                         mov     edi, [esp+4]
                    loop2:
                         mov     [edi], eax
                         mov     [edi+4], ebx
                         mov     [edi+8], edx
                         add     edi, 12
                         dec     ecx
                         jnz     loop2
                         mov     [esp+4], edi
                         and     esi, 3
                         lea     eax, [esi-1]
                         mov     [esp], eax
                         inc     eax
                         jnz     leftoverfromthemainloop
                         add     esp, 24
                         pop     ebx
                         pop     esi
                         pop     edi
                         ret

  leftoverfromthemainloop:

                        /*  Count = Count & 0x03; */
                         mov     ecx, [esp+4]
                         mov     ebx, [esp+56]
                         lea     esi, [ecx+2]
                         mov     [ecx], bx
                         shr     ebx, 16
                         mov     [esp+4], esi
                         mov     [ecx+2], bl
                         mov     eax, [esp]
                         inc     dword ptr [esp+4]
                         dec     eax
                         mov     [esp], eax
                         inc     eax
                         jnz     leftoverfromthemainloop
                         add     esp, 24
                         pop     ebx
                         pop     esi
                         pop     edi
                         ret

END
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00			`/*`
			`* PROJECT: Win32 subsystem`
			`* LICENSE: See COPYING in the top level directory`
[WIN32SS] Improve the FILE header section. Brought to you by Adam Stachowicz. CORE-10114 svn path=/trunk/; revision=69871 2015-11-10 17:41:55 +00:00			`* FILE: win32ss/gdi/dib/i386/dib24bpp_hline.s`
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00			`* PURPOSE: ASM optimised 24bpp HLine`
			`* PROGRAMMERS: Magnus Olsen`
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00			`*/`

[ASM] Merge asm related changes from cmake branch. svn path=/trunk/; revision=49826 2010-11-27 22:12:15 +00:00			`#include <asm.inc>`

			`.code`

			`PUBLIC _DIB_24BPP_HLine`
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00
			`_DIB_24BPP_HLine:`
			`push edi`
			`push esi`
			`push ebx`
			`sub esp, 24`
			`mov ebx, [esp+40]`
			`mov edi, [esp+52]`
			`mov ecx, [esp+44]`
			`mov eax, [ebx+36]`
			`mov esi, [ebx+32]`
			`mov edx, [esp+48]`
			`imul eax, edi`
			`sub edx, ecx`
			`mov [esp], edx`
			`add eax, esi`
			`lea eax, [eax+ecx*2]`
			`add eax, ecx`
			`cmp edx, 7`
			`mov esi, edx`
			`mov [esp+4], eax`
			`ja Align4byte`
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00			`lea eax, [edx-1]`
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00			`mov [esp], eax`
			`inc eax`
			`jnz small_fill`
			`add esp, 24`
			`pop ebx`
			`pop esi`
			`pop edi`
			`ret`
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00			`/* For small fills, don't bother doing anything fancy */`
			`small_fill:`
			`movzx ecx, word ptr [esp+58]`
			`mov edx, [esp+4]`
			`mov esi, [esp+56]`
			`lea eax, [edx+2]`
			`mov [esp+4], eax`
			`mov [edx+2], cl`
			`mov eax, [esp]`
			`inc dword ptr [esp+4]`
			`mov [edx], si`
			`dec eax`
			`mov [esp], eax`
			`inc eax`
			`jnz small_fill`
			`add esp, 24`
			`pop ebx`
			`pop esi`
			`pop edi`
			`ret`
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00			`Align4byte:`
			`/* Align to 4-byte address */`
			`test al, 3`
			`mov ecx, eax`
			`jz loop1`
			`lea esi, [esi+0]`
			`lea edi, [edi+0]`
Cleanup in win32k/dib: - Replace old headers with new ones, programmers added according to svn blame info - Apply consistent 2pt indentation because of the many indentation levels in this code; getting rid of a tab and 2/3/4pt mixture - Rename universal StretchBlt file to reflect content svn path=/trunk/; revision=42339 2009-08-02 15:49:17 +00:00
for 24bpp 1. Remove inline asm for hline and implement a asm version of it in intel syntax. (optimzeing of this version can be done better, but it for be done for now) 2. set eorly style on file I forget. svn path=/trunk/; revision=23732 2006-08-26 23:26:39 +00:00			`loopasmversion:`
			`/* This is about 30% faster than the generic C code below */`
			`movzx edx, word ptr [esp+58]`
			`lea edi, [ecx+2]`
			`mov eax, [esp+56]`
			`mov [esp+4], edi`
			`mov [ecx+2], dl`
			`mov ebx, [esp+4]`
			`mov [ecx], ax`
			`mov edx, [esp]`
			`inc ebx`
			`mov [esp+4], ebx`
			`dec edx`
			`test bl, 3`
			`mov [esp], edx`
			`mov ecx, ebx`
			`jnz loopasmversion`
			`mov esi, edx`

			`loop1:`
			`mov ecx, [esp+56]`
			`and ecx, 16777215`
			`mov ebx, ecx`
			`shr ebx, 8`
			`mov eax, ecx`
			`shl eax, 16`
			`or ebx, eax`
			`mov edx, ecx`
			`shl edx, 8`
			`mov eax, ecx`
			`shr eax, 16`
			`or edx, eax`
			`mov eax, ecx`
			`shl eax, 24`
			`or eax, ecx`
			`mov ecx, [esp]`
			`shr ecx, 2`
			`mov edi, [esp+4]`
			`loop2:`
			`mov [edi], eax`
			`mov [edi+4], ebx`
			`mov [edi+8], edx`
			`add edi, 12`
			`dec ecx`
			`jnz loop2`
			`mov [esp+4], edi`
			`and esi, 3`
			`lea eax, [esi-1]`
			`mov [esp], eax`
			`inc eax`
			`jnz leftoverfromthemainloop`
			`add esp, 24`
			`pop ebx`
			`pop esi`
			`pop edi`
			`ret`

			`leftoverfromthemainloop:`

			`/* Count = Count & 0x03; */`
			`mov ecx, [esp+4]`
			`mov ebx, [esp+56]`
			`lea esi, [ecx+2]`
			`mov [ecx], bx`
			`shr ebx, 16`
			`mov [esp+4], esi`
			`mov [ecx+2], bl`
			`mov eax, [esp]`
			`inc dword ptr [esp+4]`
			`dec eax`
			`mov [esp], eax`
			`inc eax`
			`jnz leftoverfromthemainloop`
			`add esp, 24`
			`pop ebx`
			`pop esi`
			`pop edi`
			`ret`
[ASM] Merge asm related changes from cmake branch. svn path=/trunk/; revision=49826 2010-11-27 22:12:15 +00:00
			`END`