[CRT:MATH] Implement x86 sse2 math functions

These are just wrappers around the normal functions and lack any optimization.
This commit is contained in:
Timo Kreuzer 2025-05-11 10:44:01 +03:00
parent 5c6912f561
commit 0e5d6af68e
3 changed files with 300 additions and 32 deletions

View file

@ -133,27 +133,27 @@
@ cdecl __iswcsym(long)
@ cdecl __iswcsymf(long)
@ stdcall -arch=arm __jump_unwind(ptr ptr) ntdll.__jump_unwind
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acos()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acosf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asin()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asinf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan2()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atanf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cos()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cosf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_exp()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_expf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10f()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_logf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_pow()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_powf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sin()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sinf()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tan()
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tanf()
@ cdecl -arch=i386 -norelay __libm_sse2_acos()
@ cdecl -arch=i386 -norelay __libm_sse2_acosf()
@ cdecl -arch=i386 -norelay __libm_sse2_asin()
@ cdecl -arch=i386 -norelay __libm_sse2_asinf()
@ cdecl -arch=i386 -norelay __libm_sse2_atan()
@ cdecl -arch=i386 -norelay __libm_sse2_atan2()
@ cdecl -arch=i386 -norelay __libm_sse2_atanf()
@ cdecl -arch=i386 -norelay __libm_sse2_cos()
@ cdecl -arch=i386 -norelay __libm_sse2_cosf()
@ cdecl -arch=i386 -norelay __libm_sse2_exp()
@ cdecl -arch=i386 -norelay __libm_sse2_expf()
@ cdecl -arch=i386 -norelay __libm_sse2_log()
@ cdecl -arch=i386 -norelay __libm_sse2_log10()
@ cdecl -arch=i386 -norelay __libm_sse2_log10f()
@ cdecl -arch=i386 -norelay __libm_sse2_logf()
@ cdecl -arch=i386 -norelay __libm_sse2_pow()
@ cdecl -arch=i386 -norelay __libm_sse2_powf()
@ cdecl -arch=i386 -norelay __libm_sse2_sin()
@ cdecl -arch=i386 -norelay __libm_sse2_sinf()
@ cdecl -arch=i386 -norelay __libm_sse2_tan()
@ cdecl -arch=i386 -norelay __libm_sse2_tanf()
@ cdecl __p___argc()
@ cdecl __p___argv()
@ cdecl __p___wargv()
@ -602,17 +602,17 @@
@ cdecl _ldunscale(ptr ptr) _dunscale
@ cdecl _lfind(ptr ptr ptr long ptr)
@ cdecl _lfind_s(ptr ptr ptr long ptr ptr)
@ cdecl -stub -arch=i386 -norelay _libm_sse2_acos_precise() #__libm_sse2_acos
@ cdecl -stub -arch=i386 -norelay _libm_sse2_asin_precise() #__libm_sse2_asin
@ cdecl -stub -arch=i386 -norelay _libm_sse2_atan_precise() #__libm_sse2_atan
@ cdecl -stub -arch=i386 -norelay _libm_sse2_cos_precise() #__libm_sse2_cos
@ cdecl -stub -arch=i386 -norelay _libm_sse2_exp_precise() #__libm_sse2_exp
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log10_precise() #__libm_sse2_log10
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log_precise() #__libm_sse2_log
@ cdecl -stub -arch=i386 -norelay _libm_sse2_pow_precise() #__libm_sse2_pow
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sin_precise() #__libm_sse2_sin
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sqrt_precise() #__libm_sse2_sqrt
@ cdecl -stub -arch=i386 -norelay _libm_sse2_tan_precise() #__libm_sse2_tan
@ cdecl -arch=i386 -norelay _libm_sse2_acos_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_asin_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_atan_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_cos_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_exp_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_log10_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_log_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_pow_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_sin_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_sqrt_precise()
@ cdecl -arch=i386 -norelay _libm_sse2_tan_precise()
@ cdecl _loaddll(str)
@ cdecl -arch=win64 _local_unwind(ptr ptr) ntdll._local_unwind
@ cdecl -arch=i386 _local_unwind2(ptr long)

View file

@ -0,0 +1,267 @@
/*
* PROJECT: ReactOS CRT
* LICENSE: MIT (https://spdx.org/licenses/MIT)
* PURPOSE: Simplified implementation of __libm_sse2_*
* COPYRIGHT: Copyright 2025 Timo Kreuzer <timo.kreuzer@reactos.org>
*/
#include <emmintrin.h>
#include <math.h>
#if defined(_MSC_VER) && !defined(__clang__)
#pragma function(acos,asin,atan,atan2,cos)
#pragma function(exp,log,log10,pow,sin,tan)
#define __ATTRIBUTE_SSE2__
#else
#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
#endif
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_acos(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = acos(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_acosf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = acos(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_asin(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = asin(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_asinf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = asin(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = atan(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_atanf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = atan(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan2(__m128d Xmm0, __m128d Xmm1)
{
double x = _mm_cvtsd_f64(Xmm0);
double y = _mm_cvtsd_f64(Xmm1);
double result = atan2(x, y);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_cos(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = cos(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_cosf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = cos(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_exp(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = exp(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_expf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = exp(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = log(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_logf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = log(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log10(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = log10(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_log10f(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = log10(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_pow(__m128d Xmm0, __m128d Xmm1)
{
double x = _mm_cvtsd_f64(Xmm0);
double y = _mm_cvtsd_f64(Xmm1);
double result = pow(x, y);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_powf(__m128 Xmm0, __m128 Xmm1)
{
float x = _mm_cvtss_f32(Xmm0);
float y = _mm_cvtss_f32(Xmm1);
float result = powf(x, y);
return _mm_set_ss(result);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_sin(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = sin(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_sinf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = sin(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_tan(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = tan(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_tanf(__m128 Xmm0)
{
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
double x = _mm_cvtsd_f64(Xmm0d);
double result = tan(x);
__m128d result128 = _mm_set_sd(result);
return _mm_cvtpd_ps(result128);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_acos_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = acos(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_asin_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = asin(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_atan_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = atan(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_cos_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = cos(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_exp_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = exp(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = log(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log10_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = log10(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_pow_precise(__m128d Xmm0, __m128d Xmm1)
{
double x = _mm_cvtsd_f64(Xmm0);
double y = _mm_cvtsd_f64(Xmm1);
double result = pow(x, y);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sin_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = sin(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sqrt_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = sqrt(x);
return _mm_set_sd(result);
}
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_tan_precise(__m128d Xmm0)
{
double x = _mm_cvtsd_f64(Xmm0);
double result = tan(x);
return _mm_set_sd(result);
}

View file

@ -24,6 +24,7 @@ if(ARCH STREQUAL "i386")
math/i386/cisin.c
math/i386/cisqrt.c
math/i386/ldexp.c
math/i386/libm_sse2.c
)
list(APPEND LIBCNTPR_MATH_ASM_SOURCE
math/i386/alldiv_asm.s