From 604dacb5afde1600fee3f1017ea9d201064c3b26 Mon Sep 17 00:00:00 2001
From: Timo Kreuzer <timo.kreuzer@reactos.org>
Date: Fri, 29 May 2015 20:10:48 +0000
Subject: [PATCH] [CRT] Implement a portable version of sqrt. It's using an
 optimized combination of different algorithms, resulting in code that
 calculates the result in the maximum possible (when internally using double)
 precision with only 11 multiplications and a single division.

svn path=/trunk/; revision=67956
---
 reactos/lib/sdk/crt/crt.cmake       |  1 +
 reactos/lib/sdk/crt/libcntpr.cmake  |  1 -
 reactos/lib/sdk/crt/math/arm/sqrt.s | 24 ----------
 reactos/lib/sdk/crt/math/sqrt.c     | 70 +++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 25 deletions(-)
 delete mode 100644 reactos/lib/sdk/crt/math/arm/sqrt.s
 create mode 100644 reactos/lib/sdk/crt/math/sqrt.c

diff --git a/reactos/lib/sdk/crt/crt.cmake b/reactos/lib/sdk/crt/crt.cmake
index 4d08f91998a..df09b26ba8a 100644
--- a/reactos/lib/sdk/crt/crt.cmake
+++ b/reactos/lib/sdk/crt/crt.cmake
@@ -477,6 +477,7 @@ elseif(ARCH STREQUAL "arm")
     list(APPEND CRT_SOURCE
         except/arm/ehandler.c
         math/fabsf.c
+        math/sqrt.c
         math/arm/__rt_sdiv.c
         math/arm/__rt_sdiv64_worker.c
         math/arm/__rt_udiv.c
diff --git a/reactos/lib/sdk/crt/libcntpr.cmake b/reactos/lib/sdk/crt/libcntpr.cmake
index bef8947d169..dc2c1762a34 100644
--- a/reactos/lib/sdk/crt/libcntpr.cmake
+++ b/reactos/lib/sdk/crt/libcntpr.cmake
@@ -165,7 +165,6 @@ elseif(ARCH STREQUAL "arm")
         math/arm/log.s
         math/arm/log10.s
         math/arm/pow.s
-        math/arm/sqrt.s
         math/arm/tan.s
         math/arm/__dtoi64.s
         math/arm/__dtou64.s
diff --git a/reactos/lib/sdk/crt/math/arm/sqrt.s b/reactos/lib/sdk/crt/math/arm/sqrt.s
deleted file mode 100644
index 4852306c85b..00000000000
--- a/reactos/lib/sdk/crt/math/arm/sqrt.s
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * COPYRIGHT:         BSD - See COPYING.ARM in the top level directory
- * PROJECT:           ReactOS CRT library
- * PURPOSE:           Implementation of sqrt
- * PROGRAMMER:        Timo Kreuzer (timo.kreuzer@reactos.org)
- */
-
-/* INCLUDES ******************************************************************/
-
-#include <kxarm.h>
-
-/* CODE **********************************************************************/
-
-    TEXTAREA
-
-    LEAF_ENTRY sqrt
-
-	__assertfail
-	bx	lr
-
-    LEAF_END sqrt
-
-    END
-/* EOF */
diff --git a/reactos/lib/sdk/crt/math/sqrt.c b/reactos/lib/sdk/crt/math/sqrt.c
new file mode 100644
index 00000000000..3b6334467ef
--- /dev/null
+++ b/reactos/lib/sdk/crt/math/sqrt.c
@@ -0,0 +1,70 @@
+/*
+ * COPYRIGHT:       BSD - See COPYING.ARM in the top level directory
+ * PROJECT:         ReactOS CRT library
+ * PURPOSE:         Portable implementation of sqrt
+ * PROGRAMMER:      Timo Kreuzer (timo.kreuzer@reactos.org)
+ */
+
+#include <math.h>
+#include <assert.h>
+
+double
+__cdecl
+sqrt(
+    double x)
+{
+    const double threehalfs = 1.5;
+    const double x2 = x * 0.5;
+    long long bits;
+    double inv, y;
+
+    /* Handle special cases */
+    if (x == 0.0)
+    {
+        return x;
+    }
+    else if (x < 0.0)
+    {
+        return -NAN;
+    }
+
+    /* Convert into a 64  bit integer */
+    bits = *(long long *)&x;
+
+    /* Check for !finite(x) */
+    if ((bits & 0x7ff7ffffffffffffLL) == 0x7ff0000000000000LL)
+    {
+        return x;
+    }
+
+    /* Step 1: quick approximation of 1/sqrt(x) with bit magic
+       See http://en.wikipedia.org/wiki/Fast_inverse_square_root */
+    bits = 0x5fe6eb50c7b537a9ll - (bits >> 1);
+    inv = *(double*)&bits;
+
+    /* Step 2: 3 Newton iterations to approximate 1 / sqrt(x) */
+    inv = inv * (threehalfs - (x2 * inv * inv));
+    inv = inv * (threehalfs - (x2 * inv * inv));
+    inv = inv * (threehalfs - (x2 * inv * inv));
+
+    /* Step 3: 1 additional Heron iteration has shown to maximize the precision.
+       Normally the formula would be: y = (y + (x / y)) * 0.5;
+       Instead we use the inverse sqrt directly */
+    y = ((1 / inv) + (x * inv)) * 0.5;
+
+    //assert(y == (double)((y + (x / y)) * 0.5));
+    /* GCC BUG: While the C-Standard requires that an explicit cast to
+       double converts the result of a computation to the appropriate
+       64 bit value, our GCC ignores this and uses an 80 bit FPU register
+       in an intermediate value, so we need to make sure it is stored in
+       a memory location before comparison */
+#if DBG
+    {
+        volatile double y1 = y, y2;
+        y2 = (y + (x / y)) * 0.5;
+        assert(y1 == y2);
+    }
+#endif
+
+    return y;
+}