mirror of
https://github.com/reactos/reactos.git
synced 2024-07-30 08:08:56 +00:00
[CRT]
- Remove x64 asm stub for acos from cmake file, since we already have a generic C implementation - Implement sqrt for amd64 in SSE, both in C and asm. While the C version would be sufficient, it's currently less portable due to the lack of mm intrinsics for GCC - Silence a warning svn path=/trunk/; revision=58251
This commit is contained in:
parent
57f9b8f9ab
commit
1edcc31339
|
@ -433,7 +433,7 @@ elseif(ARCH STREQUAL "amd64")
|
||||||
float/amd64/getsetfpcw.S
|
float/amd64/getsetfpcw.S
|
||||||
float/amd64/fpreset.S
|
float/amd64/fpreset.S
|
||||||
float/amd64/logb.S
|
float/amd64/logb.S
|
||||||
math/amd64/acos.S
|
# math/amd64/acos.S
|
||||||
math/amd64/acosf.S
|
math/amd64/acosf.S
|
||||||
math/amd64/atan.S
|
math/amd64/atan.S
|
||||||
math/amd64/atan2.S
|
math/amd64/atan2.S
|
||||||
|
|
|
@ -1402,6 +1402,7 @@ char* CDECL setlocale(int category, const char* locale)
|
||||||
if(category == LC_ALL)
|
if(category == LC_ALL)
|
||||||
return construct_lc_all(locinfo);
|
return construct_lc_all(locinfo);
|
||||||
|
|
||||||
|
_Analysis_assume_(category <= 5);
|
||||||
return locinfo->lc_category[category].locale;
|
return locinfo->lc_category[category].locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1481,13 +1482,13 @@ MSVCRT__locale_t global_locale = NULL;
|
||||||
void __init_global_locale()
|
void __init_global_locale()
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
LOCK_LOCALE;
|
LOCK_LOCALE;
|
||||||
/* Someone created it before us */
|
/* Someone created it before us */
|
||||||
if(global_locale)
|
if(global_locale)
|
||||||
return;
|
return;
|
||||||
global_locale = MSVCRT__create_locale(0, "C");
|
global_locale = MSVCRT__create_locale(0, "C");
|
||||||
|
|
||||||
__lc_codepage = MSVCRT_locale->locinfo->lc_codepage;
|
__lc_codepage = MSVCRT_locale->locinfo->lc_codepage;
|
||||||
MSVCRT___lc_collate_cp = MSVCRT_locale->locinfo->lc_collate_cp;
|
MSVCRT___lc_collate_cp = MSVCRT_locale->locinfo->lc_collate_cp;
|
||||||
__mb_cur_max = MSVCRT_locale->locinfo->mb_cur_max;
|
__mb_cur_max = MSVCRT_locale->locinfo->mb_cur_max;
|
||||||
|
|
73
reactos/lib/sdk/crt/math/amd64/asin.c
Normal file
73
reactos/lib/sdk/crt/math/amd64/asin.c
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
/*
|
||||||
|
* COPYRIGHT: See COPYING in the top level directory
|
||||||
|
* PROJECT: ReactOS CRT
|
||||||
|
* FILE: lib/crt/math/acos.c
|
||||||
|
* PURPOSE: Generic C implementation of arc sine
|
||||||
|
* PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PRECISION 9
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The arc sine can be approximated with the following row:
|
||||||
|
*
|
||||||
|
* asin(x) = a0*x + a1*x^3 + a2*x^5 + a3*x^7 + a4*x^9 + ...
|
||||||
|
*
|
||||||
|
* To reduce the number of multiplications the formula is transformed to
|
||||||
|
*
|
||||||
|
* asin(x) = x * (1 + x^2*(a1 + x^2*(a2 + x^2*(a3 + ...) ) ) )
|
||||||
|
*
|
||||||
|
* The coefficients are:
|
||||||
|
* a0 = 1
|
||||||
|
* a1 = (1/2*3)
|
||||||
|
* a2 = (3*1/4*2*5)
|
||||||
|
* a3 = (5*3*1/6*4*2*7)
|
||||||
|
* a4 = (7*5*3*1/8*6*4*2*9)
|
||||||
|
* a5 = (9*7*5*3*1/10*8*6*4*2*11)
|
||||||
|
* ...
|
||||||
|
*/
|
||||||
|
|
||||||
|
double
|
||||||
|
asin(double x)
|
||||||
|
{
|
||||||
|
double x2, result;
|
||||||
|
|
||||||
|
/* Check range */
|
||||||
|
if ((x > 1.) || (x < -1.)) return NaN;
|
||||||
|
|
||||||
|
/* Calculate the square of x */
|
||||||
|
x2 = (x * x);
|
||||||
|
|
||||||
|
/* Start with 0, compiler will optimize this away */
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
result += (15*13*11*9*7*5*3*1./(16*14*12*10*8*6*4*2*17));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (13*11*9*7*5*3*1./(14*12*10*8*6*4*2*15));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (11*9*7*5*3*1./(12*10*8*6*4*2*13));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (9*7*5*3*1./(10*8*6*4*2*11));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (7*5*3*1./(8*6*4*2*9));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (5*3*1./(6*4*2*7));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (3*1./(4*2*5));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += (1./(2*3));
|
||||||
|
result *= x2;
|
||||||
|
|
||||||
|
result += 1.;
|
||||||
|
result *= x;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
|
@ -9,14 +9,57 @@
|
||||||
/* INCLUDES ******************************************************************/
|
/* INCLUDES ******************************************************************/
|
||||||
|
|
||||||
#include <asm.inc>
|
#include <asm.inc>
|
||||||
#include <ksamd64.inc>
|
|
||||||
|
|
||||||
/* CODE **********************************************************************/
|
/* CODE **********************************************************************/
|
||||||
.code64
|
.code64
|
||||||
|
|
||||||
PUBLIC sqrt
|
PUBLIC sqrt
|
||||||
sqrt:
|
sqrt:
|
||||||
UNIMPLEMENTED sqrt
|
|
||||||
|
/* Load the sign bit into rdx */
|
||||||
|
mov rdx, HEX(8000000000000000)
|
||||||
|
|
||||||
|
/* Move the lower 64 bits of xmm0 into rax */
|
||||||
|
movd rax, xmm0
|
||||||
|
|
||||||
|
/* Test the sign bit */
|
||||||
|
test rax, rdx
|
||||||
|
|
||||||
|
/* If it is set, go to the failure path */
|
||||||
|
jnz x_is_negative
|
||||||
|
|
||||||
|
/* x is positive, now check if it is NaN by checking if the unsigned
|
||||||
|
integer value is larger than the highest valid positive value. */
|
||||||
|
mov rcx, 7FF0000000000000h
|
||||||
|
cmp rax, rcx
|
||||||
|
ja short x_is_nan
|
||||||
|
|
||||||
|
/* All is well, calculate the sqrt */
|
||||||
|
sqrtpd xmm0, xmm0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
x_is_negative:
|
||||||
|
/* Load failure return value (-1.#IND00) into rcx */
|
||||||
|
mov rcx, HEX(0FFF8000000000000)
|
||||||
|
|
||||||
|
/* Check if the parameter was -0.0 */
|
||||||
|
cmp rax, rdx
|
||||||
|
|
||||||
|
/* If it was not, load the failure value, otherwise keep -0.0 */
|
||||||
|
cmovne rax, rcx
|
||||||
|
|
||||||
|
/* Move the value back into the return register */
|
||||||
|
movd xmm0, rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
x_is_nan:
|
||||||
|
/* Create a 1.#QNAN0 by setting this bit */
|
||||||
|
mov rcx, HEX(8000000000000)
|
||||||
|
or rax, rcx
|
||||||
|
|
||||||
|
/* Move the value back into the return register */
|
||||||
|
movd xmm0, rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
END
|
END
|
||||||
|
|
77
reactos/lib/sdk/crt/math/amd64/sqrt.c
Normal file
77
reactos/lib/sdk/crt/math/amd64/sqrt.c
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
#include <intrin.h>
|
||||||
|
|
||||||
|
double
|
||||||
|
sqrt (
|
||||||
|
double x)
|
||||||
|
{
|
||||||
|
register union
|
||||||
|
{
|
||||||
|
__m128d x128d;
|
||||||
|
__m128i x128i;
|
||||||
|
} u ;
|
||||||
|
register union
|
||||||
|
{
|
||||||
|
unsigned long long ullx;
|
||||||
|
double dbl;
|
||||||
|
} u2;
|
||||||
|
|
||||||
|
/* Set the lower double-precision value of u to x.
|
||||||
|
All that we want, is that the compiler understands that we have the
|
||||||
|
function parameter in a register that we can address as an __m128.
|
||||||
|
Sadly there is no obvious way to do that. If we use the union, VS will
|
||||||
|
generate code to store xmm0 in memory and the read it into a GPR.
|
||||||
|
We avoid memory access by using a direct move. But even here we won't
|
||||||
|
get a simple MOVSD. We can either do:
|
||||||
|
a) _mm_set_sd: move x into the lower part of an xmm register and zero
|
||||||
|
out the upper part (XORPD+MOVSD)
|
||||||
|
b) _mm_set1_pd: move x into the lower and higher part of an xmm register
|
||||||
|
(MOVSD+UNPCKLPD)
|
||||||
|
c) _mm_set_pd, which either generates a memory access, when we try to
|
||||||
|
tell it to keep the upper 64 bits, or generate 2 MOVAPS + UNPCKLPD
|
||||||
|
We choose a, which is probably the fastest.
|
||||||
|
*/
|
||||||
|
u.x128d = _mm_set_sd(x);
|
||||||
|
|
||||||
|
/* Move the contents of the lower 64 bit into a 64 bit GPR using MOVD */
|
||||||
|
u2.ullx = _mm_cvtsi128_si64(u.x128i);
|
||||||
|
|
||||||
|
/* Check for negative values */
|
||||||
|
if (u2.ullx & 0x8000000000000000ULL)
|
||||||
|
{
|
||||||
|
/* Check if this is *really* negative and not just -0.0 */
|
||||||
|
if (u2.ullx != 0x8000000000000000ULL)
|
||||||
|
{
|
||||||
|
/* Return -1.#IND00 */
|
||||||
|
u2.ullx = 0xfff8000000000000ULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return what we have */
|
||||||
|
return u2.dbl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if this is a NaN (bits 52-62 are 1, bit 0-61 are not all 0) or
|
||||||
|
negative (bit 63 is 1) */
|
||||||
|
if (u2.ullx > 0x7FF0000000000000ULL)
|
||||||
|
{
|
||||||
|
/* Set this bit. That's what MS function does. */
|
||||||
|
u2.ullx |= 0x8000000000000ULL;
|
||||||
|
return u2.dbl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate the square root. */
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* Another YAY for the MS compiler. There are 2 instructions we could use:
|
||||||
|
SQRTPD (computes sqrt for 2 double values) or SQRTSD (computes sqrt for
|
||||||
|
only the lower 64 bit double value). Obviously we only need 1. And on
|
||||||
|
Some architectures SQRTPD is twice as slow as SQRTSD. On the other hand
|
||||||
|
the MS compiler is stupid and always generates an additional MOVAPS
|
||||||
|
instruction when SQRTSD is used. We choose to use SQRTPD here since on
|
||||||
|
modern hardware it's as fast as SQRTSD. */
|
||||||
|
u.x128d = _mm_sqrt_pd(u.x128d); // SQRTPD
|
||||||
|
#else
|
||||||
|
u.x128d = _mm_sqrt_sd(u.x128d, u.x128d); // SQRTSD
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return u.x128d.m128d_f64[0];
|
||||||
|
}
|
Loading…
Reference in a new issue