- implement generic C version of cos(). On amd64 machines it will compile into very optimized SSE2 code. No need to use assembly.

svn path=/branches/ros-amd64-bringup/; revision=45290
This commit is contained in:
Timo Kreuzer 2010-01-27 23:42:34 +00:00
parent 9c9f4a9fd7
commit c21ada51d6
4 changed files with 90 additions and 23 deletions

View file

@ -161,13 +161,13 @@
<file>jn_yn.c</file>-->
</if>
<if property="ARCH" value="amd64">
<file>cos.c</file>
<directory name="amd64">
<file>alldiv.S</file>
<file>atan.S</file>
<file>atan2.S</file>
<file>ceil.S</file>
<file>ceilf.S</file>
<file>cos.S</file>
<file>exp.S</file>
<file>fabs.S</file>
<file>floor.S</file>

View file

@ -64,7 +64,6 @@
<file>atan.S</file>
<file>atan2.S</file>
<file>ceil.S</file>
<file>cos.S</file>
<file>exp.S</file>
<file>fabs.S</file>
<file>floor.S</file>

View file

@ -1,21 +0,0 @@
/*
* COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS system libraries
* PURPOSE: Implementation of cos
* FILE: lib/sdk/crt/math/amd64/cos.S
* PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org)
*/
/* INCLUDES ******************************************************************/
#include <ndk/amd64/asm.h>
#include <ndk/amd64/asmmacro.S>
.intel_syntax noprefix
.proc cos
UNIMPLEMENTED cos
ret
.endproc

View file

@ -0,0 +1,89 @@
/*
* COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS CRT
* FILE: lib/crt/math/cos.c
* PURPOSE: Generic C Implementation of cos
* PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org)
*/
#define PRECISION 9
#define M_PI 3.141592653589793238462643
static double cos_off_tbl[] = {0.0, -M_PI/2., 0, -M_PI/2.};
static double cos_sign_tbl[] = {1,-1,-1,1};
double
cos(double x)
{
int quadrant;
double x2, result;
/* Calculate the quadrant */
quadrant = x * (2./M_PI);
/* Get offset inside quadrant */
x = x - quadrant * (M_PI/2.);
/* Normalize quadrant to [0..3] */
quadrant = quadrant & 0x3;
/* Fixup value for the generic function */
x += cos_off_tbl[quadrant];
/* Calculate the negative of the square of x */
x2 = - (x * x);
/* This is an unrolled taylor series using <PRECISION> iterations
* Example with 4 iterations:
* result = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8!
* To save multiplications and to keep the precision high, it's performed
* like this:
* result = 1 - x^2 * (1/2! - x^2 * (1/4! - x^2 * (1/6! - x^2 * (1/8!))))
*/
/* Start with 0, compiler will optimize this away */
result = 0;
#if (PRECISION >= 10)
result += 1./(1.*2*3*4*5*6*7*8*9*10*11*12*13*14*15*16*17*18*20);
result *= x2;
#endif
#if (PRECISION >= 9)
result += 1./(1.*2*3*4*5*6*7*8*9*10*11*12*13*14*15*16*17*18);
result *= x2;
#endif
#if (PRECISION >= 8)
result += 1./(1.*2*3*4*5*6*7*8*9*10*11*12*13*14*15*16);
result *= x2;
#endif
#if (PRECISION >= 7)
result += 1./(1.*2*3*4*5*6*7*8*9*10*11*12*13*14);
result *= x2;
#endif
#if (PRECISION >= 6)
result += 1./(1.*2*3*4*5*6*7*8*9*10*11*12);
result *= x2;
#endif
#if (PRECISION >= 5)
result += 1./(1.*2*3*4*5*6*7*8*9*10);
result *= x2;
#endif
result += 1./(1.*2*3*4*5*6*7*8);
result *= x2;
result += 1./(1.*2*3*4*5*6);
result *= x2;
result += 1./(1.*2*3*4);
result *= x2;
result += 1./(1.*2);
result *= x2;
result += 1;
/* Apply correct sign */
result *= cos_sign_tbl[quadrant];
return result;
}