From 2779f4ccddaa343a8290d75ceb0a80017ace6fe8 Mon Sep 17 00:00:00 2001
From: Timo Kreuzer <timo.kreuzer@reactos.org>
Date: Wed, 28 Sep 2011 21:32:37 +0000
Subject: [PATCH] [CRT] implement floor and floorf for amd64 with sse2
 instructions

svn path=/trunk/; revision=53887
---
 reactos/lib/sdk/crt/libcntpr.cmake      |  1 +
 reactos/lib/sdk/crt/math/amd64/floor.S  | 25 +++++++++++++++++---
 reactos/lib/sdk/crt/math/amd64/floorf.S | 31 +++++++++++--------------
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/reactos/lib/sdk/crt/libcntpr.cmake b/reactos/lib/sdk/crt/libcntpr.cmake
index 8fa7984e4d9..01e5858841b 100644
--- a/reactos/lib/sdk/crt/libcntpr.cmake
+++ b/reactos/lib/sdk/crt/libcntpr.cmake
@@ -111,6 +111,7 @@ elseif(ARCH MATCHES amd64)
         math/amd64/exp.S
         math/amd64/fabs.S
         math/amd64/floor.S
+        math/amd64/floorf.S
         math/amd64/fmod.S
         math/amd64/ldexp.S
         math/amd64/log.S
diff --git a/reactos/lib/sdk/crt/math/amd64/floor.S b/reactos/lib/sdk/crt/math/amd64/floor.S
index ee39d77955f..ca0343fab7e 100644
--- a/reactos/lib/sdk/crt/math/amd64/floor.S
+++ b/reactos/lib/sdk/crt/math/amd64/floor.S
@@ -9,14 +9,33 @@
 /* INCLUDES ******************************************************************/
 
 #include <asm.inc>
-#include <ksamd64.inc>
 
 /* CODE **********************************************************************/
 .code64
 
 PUBLIC floor
-floor:
-    UNIMPLEMENTED floor
+FUNC floor
+    sub rsp, 16
+    .ENDPROLOG
+
+    /* Truncate xmm0 to integer (double precision) */
+    cvttsd2si rcx, xmm0
+
+    /* Duplicate the bits into rax */
+    movd rax, xmm0
+
+    /* Shift all bits to the right, keeping the sign bit */
+    shr rax, 63
+
+    /* Substract the sign bit from the truncated value, so that
+       we get the correct result for negative values. */
+    sub rcx, rax
+
+    /* Convert the result back to xmm0 (double precision) */
+    cvtsi2sd xmm0, rcx
+
+    add rsp, 16
     ret
+ENDFUNC floor
 
 END
diff --git a/reactos/lib/sdk/crt/math/amd64/floorf.S b/reactos/lib/sdk/crt/math/amd64/floorf.S
index 68ab5fb9696..e3b30bad4d4 100644
--- a/reactos/lib/sdk/crt/math/amd64/floorf.S
+++ b/reactos/lib/sdk/crt/math/amd64/floorf.S
@@ -1,7 +1,7 @@
 /*
  * COPYRIGHT:         See COPYING in the top level directory
  * PROJECT:           ReactOS system libraries
- * PURPOSE:           Implementation of tan
+ * PURPOSE:           Implementation of floorf
  * FILE:              lib/sdk/crt/math/amd64/floorf.S
  * PROGRAMMER:        Timo Kreuzer (timo.kreuzer@reactos.org)
  */
@@ -9,7 +9,6 @@
 /* INCLUDES ******************************************************************/
 
 #include <asm.inc>
-#include <ksamd64.inc>
 
 /* CODE **********************************************************************/
 .code64
@@ -19,26 +18,22 @@ FUNC floorf
     sub rsp, 16
     .ENDPROLOG
 
-    /* Put parameter on the stack */
-    movss dword ptr [rsp], xmm0
-    fld   dword ptr [rsp]
+    /* Truncate xmm0 to integer (single precision) */
+    cvttss2si rcx, xmm0
 
-    /* Change fpu control word to round down */
-    fstcw [rsp]
-    mov   eax, [rsp]
-    or    eax, HEX(000400)
-    and   eax, HEX(00f7ff)
-    mov   [rsp + 8], eax
-    fldcw [rsp + 8]
+    /* Duplicate the bits into rax */
+    movd eax, xmm0
 
-    /* Round to integer */
-    frndint
+    /* Shift all bits to the right, keeping the sign bit */
+    shr rax, 31
 
-    /* Restore fpu control word */
-    fldcw [rsp]
+    /* Substract the sign bit from the truncated value, so that
+       we get the correct result for negative values. */
+    sub rcx, rax
+
+    /* Convert the result back to xmm0 (single precision) */
+    cvtsi2ss xmm0, rcx
 
-    fstp  dword ptr [rsp]
-    movss xmm0, dword ptr [rsp]
     add rsp, 16
     ret
 ENDFUNC floorf