mirror of
https://github.com/reactos/reactos.git
synced 2025-07-31 10:01:43 +00:00
[CRT:MATH] Implement x86 sse2 math functions
These are just wrappers around the normal functions and lack any optimization.
This commit is contained in:
parent
5c6912f561
commit
0e5d6af68e
3 changed files with 300 additions and 32 deletions
|
@ -133,27 +133,27 @@
|
|||
@ cdecl __iswcsym(long)
|
||||
@ cdecl __iswcsymf(long)
|
||||
@ stdcall -arch=arm __jump_unwind(ptr ptr) ntdll.__jump_unwind
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acos()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acosf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asin()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asinf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan2()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atanf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cos()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cosf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_exp()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_expf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10f()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_logf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_pow()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_powf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sin()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sinf()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tan()
|
||||
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tanf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_acos()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_acosf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_asin()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_asinf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_atan()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_atan2()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_atanf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_cos()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_cosf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_exp()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_expf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_log()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_log10()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_log10f()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_logf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_pow()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_powf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_sin()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_sinf()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_tan()
|
||||
@ cdecl -arch=i386 -norelay __libm_sse2_tanf()
|
||||
@ cdecl __p___argc()
|
||||
@ cdecl __p___argv()
|
||||
@ cdecl __p___wargv()
|
||||
|
@ -602,17 +602,17 @@
|
|||
@ cdecl _ldunscale(ptr ptr) _dunscale
|
||||
@ cdecl _lfind(ptr ptr ptr long ptr)
|
||||
@ cdecl _lfind_s(ptr ptr ptr long ptr ptr)
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_acos_precise() #__libm_sse2_acos
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_asin_precise() #__libm_sse2_asin
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_atan_precise() #__libm_sse2_atan
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_cos_precise() #__libm_sse2_cos
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_exp_precise() #__libm_sse2_exp
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log10_precise() #__libm_sse2_log10
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log_precise() #__libm_sse2_log
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_pow_precise() #__libm_sse2_pow
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sin_precise() #__libm_sse2_sin
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sqrt_precise() #__libm_sse2_sqrt
|
||||
@ cdecl -stub -arch=i386 -norelay _libm_sse2_tan_precise() #__libm_sse2_tan
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_acos_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_asin_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_atan_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_cos_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_exp_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_log10_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_log_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_pow_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_sin_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_sqrt_precise()
|
||||
@ cdecl -arch=i386 -norelay _libm_sse2_tan_precise()
|
||||
@ cdecl _loaddll(str)
|
||||
@ cdecl -arch=win64 _local_unwind(ptr ptr) ntdll._local_unwind
|
||||
@ cdecl -arch=i386 _local_unwind2(ptr long)
|
||||
|
|
267
sdk/lib/crt/math/i386/libm_sse2.c
Normal file
267
sdk/lib/crt/math/i386/libm_sse2.c
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
* PROJECT: ReactOS CRT
|
||||
* LICENSE: MIT (https://spdx.org/licenses/MIT)
|
||||
* PURPOSE: Simplified implementation of __libm_sse2_*
|
||||
* COPYRIGHT: Copyright 2025 Timo Kreuzer <timo.kreuzer@reactos.org>
|
||||
*/
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <math.h>
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#pragma function(acos,asin,atan,atan2,cos)
|
||||
#pragma function(exp,log,log10,pow,sin,tan)
|
||||
#define __ATTRIBUTE_SSE2__
|
||||
#else
|
||||
#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||
#endif
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_acos(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = acos(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_acosf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = acos(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_asin(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = asin(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_asinf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = asin(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = atan(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_atanf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = atan(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan2(__m128d Xmm0, __m128d Xmm1)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double y = _mm_cvtsd_f64(Xmm1);
|
||||
double result = atan2(x, y);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_cos(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = cos(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_cosf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = cos(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_exp(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = exp(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_expf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = exp(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = log(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_logf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = log(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log10(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = log10(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_log10f(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = log10(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_pow(__m128d Xmm0, __m128d Xmm1)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double y = _mm_cvtsd_f64(Xmm1);
|
||||
double result = pow(x, y);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_powf(__m128 Xmm0, __m128 Xmm1)
|
||||
{
|
||||
float x = _mm_cvtss_f32(Xmm0);
|
||||
float y = _mm_cvtss_f32(Xmm1);
|
||||
float result = powf(x, y);
|
||||
return _mm_set_ss(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_sin(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = sin(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_sinf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = sin(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_tan(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = tan(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_tanf(__m128 Xmm0)
|
||||
{
|
||||
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
|
||||
double x = _mm_cvtsd_f64(Xmm0d);
|
||||
double result = tan(x);
|
||||
__m128d result128 = _mm_set_sd(result);
|
||||
return _mm_cvtpd_ps(result128);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_acos_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = acos(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_asin_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = asin(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_atan_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = atan(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_cos_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = cos(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_exp_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = exp(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = log(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log10_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = log10(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_pow_precise(__m128d Xmm0, __m128d Xmm1)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double y = _mm_cvtsd_f64(Xmm1);
|
||||
double result = pow(x, y);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sin_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = sin(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sqrt_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = sqrt(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
||||
|
||||
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_tan_precise(__m128d Xmm0)
|
||||
{
|
||||
double x = _mm_cvtsd_f64(Xmm0);
|
||||
double result = tan(x);
|
||||
return _mm_set_sd(result);
|
||||
}
|
|
@ -24,6 +24,7 @@ if(ARCH STREQUAL "i386")
|
|||
math/i386/cisin.c
|
||||
math/i386/cisqrt.c
|
||||
math/i386/ldexp.c
|
||||
math/i386/libm_sse2.c
|
||||
)
|
||||
list(APPEND LIBCNTPR_MATH_ASM_SOURCE
|
||||
math/i386/alldiv_asm.s
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue