implement floor and floorf for amd64 with sse2 instructions

svn path=/trunk/; revision=53887
This commit is contained in:
Timo Kreuzer 2011-09-28 21:32:37 +00:00
parent 6d1eecf3d9
commit 2779f4ccdd
3 changed files with 36 additions and 21 deletions

View file

@ -111,6 +111,7 @@ elseif(ARCH MATCHES amd64)
math/amd64/exp.S math/amd64/exp.S
math/amd64/fabs.S math/amd64/fabs.S
math/amd64/floor.S math/amd64/floor.S
math/amd64/floorf.S
math/amd64/fmod.S math/amd64/fmod.S
math/amd64/ldexp.S math/amd64/ldexp.S
math/amd64/log.S math/amd64/log.S

View file

@ -9,14 +9,33 @@
/* INCLUDES ******************************************************************/ /* INCLUDES ******************************************************************/
#include <asm.inc> #include <asm.inc>
#include <ksamd64.inc>
/* CODE **********************************************************************/ /* CODE **********************************************************************/
.code64 .code64
PUBLIC floor PUBLIC floor
floor: FUNC floor
UNIMPLEMENTED floor sub rsp, 16
.ENDPROLOG
/* Truncate xmm0 to integer (double precision) */
cvttsd2si rcx, xmm0
/* Duplicate the bits into rax */
movd rax, xmm0
/* Shift all bits to the right, keeping the sign bit */
shr rax, 63
/* Substract the sign bit from the truncated value, so that
we get the correct result for negative values. */
sub rcx, rax
/* Convert the result back to xmm0 (double precision) */
cvtsi2sd xmm0, rcx
add rsp, 16
ret ret
ENDFUNC floor
END END

View file

@ -1,7 +1,7 @@
/* /*
* COPYRIGHT: See COPYING in the top level directory * COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS system libraries * PROJECT: ReactOS system libraries
* PURPOSE: Implementation of tan * PURPOSE: Implementation of floorf
* FILE: lib/sdk/crt/math/amd64/floorf.S * FILE: lib/sdk/crt/math/amd64/floorf.S
* PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org) * PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org)
*/ */
@ -9,7 +9,6 @@
/* INCLUDES ******************************************************************/ /* INCLUDES ******************************************************************/
#include <asm.inc> #include <asm.inc>
#include <ksamd64.inc>
/* CODE **********************************************************************/ /* CODE **********************************************************************/
.code64 .code64
@ -19,26 +18,22 @@ FUNC floorf
sub rsp, 16 sub rsp, 16
.ENDPROLOG .ENDPROLOG
/* Put parameter on the stack */ /* Truncate xmm0 to integer (single precision) */
movss dword ptr [rsp], xmm0 cvttss2si rcx, xmm0
fld dword ptr [rsp]
/* Change fpu control word to round down */ /* Duplicate the bits into rax */
fstcw [rsp] movd eax, xmm0
mov eax, [rsp]
or eax, HEX(000400)
and eax, HEX(00f7ff)
mov [rsp + 8], eax
fldcw [rsp + 8]
/* Round to integer */ /* Shift all bits to the right, keeping the sign bit */
frndint shr rax, 31
/* Restore fpu control word */ /* Substract the sign bit from the truncated value, so that
fldcw [rsp] we get the correct result for negative values. */
sub rcx, rax
/* Convert the result back to xmm0 (single precision) */
cvtsi2ss xmm0, rcx
fstp dword ptr [rsp]
movss xmm0, dword ptr [rsp]
add rsp, 16 add rsp, 16
ret ret
ENDFUNC floorf ENDFUNC floorf