Files
FC1/CryAnimation/QuaternionExponentX87.c
romkazvo 34d6c5d489 123
2023-08-07 19:29:24 +08:00

164 lines
3.5 KiB
C

#include "QuaternionExponentX87.h"
#ifdef _CPU_X86
static const float fEpsilon = 1e-4f;
//////////////////////////////////////////////////////////////////////////
// x87 asm optimized quaternion exponent
// PARAMETERS:
// pSrcVector[IN] - the vector to calculate the exponent for
// pDstQuat [OUT]- the quaternion (exponent of the input)
// NOTE:
// The input vector mimics a quaternion with 0 real component (W)
// This version uses FSINCOS, which takes ~70% of execution time
//////////////////////////////////////////////////////////////////////////
void quaternionExponent_x87(const float* pSrc, float* pDst)
{
_asm
{
mov ESI, pSrc
mov EDI, pDst
// double d = sqrt( double(pSrc[0])*pSrc[1] + double(pSrc[1])*pSrc[1] + double(pSrc[2])*pSrc[2]);
fld [ESI+8]
fld [ESI+4]
fld [ESI ]
fld ST(0)
fmul ST(0),ST(0)
fld ST(2)
fmul ST(0),ST(0)
faddp ST(1),ST(0)
fld ST(3)
fmul ST(0),ST(0)
faddp ST(1),ST(0)
// ST(0): x^2+y^2+z^2 == d^2
// ST(1): x
// ST(2): y
// ST(3): z
fld fEpsilon
fcomip ST, ST(1)
jnc small_rotation // this path is almost never taken
fsqrt
fld ST(0)
// now we need cos, sin to replace the current value
fsincos
// STACK: cos, sin, sqrt, x, y, z
fstp dword ptr [EDI]
fdivrp ST(1),ST(0) // STACK: sin(d)/d, x,y,z
fmul ST(1),ST(0)
fmul ST(2),ST(0)
fmulp ST(3),ST(0)
fstp dword ptr [EDI+ 4]
fstp dword ptr [EDI+ 8]
fstp dword ptr [EDI+12]
}
return;
_asm
{
small_rotation:
fld1
fsubrp ST(1),ST(0)
fstp dword ptr [EDI ]
fstp dword ptr [EDI+4]
fstp dword ptr [EDI+8]
fstp dword ptr [EDI+12]
}
}
static const float
fDivBy2 = 1/2.0f,
fDivBy3 = 1/3.0f,
fDivBy4 = 1/4.0f,
fDivBy5 = 1/5.0f,
fDivBy6 = 1/6.0f,
fDivBy7 = 1/7.0f,
fDivBy8 = 1/8.0f,
fDivBy9 = 1/9.0f;
// Takes pSrc: the x,y,z of the imaginary part of the quaternion 0+xi+yj+zk to calculate the exponent
// into pDst: the x,y,z,w of the resulting quaternion IN THAT ORDER
void quaternionExponent_x87approx(const float* pSrc, float* pDst)
{
_asm
{
mov ESI, pSrc
mov EDI, pDst
// double d = sqrt( double(pSrc[0])*pSrc[1] + double(pSrc[1])*pSrc[1] + double(pSrc[2])*pSrc[2]);
fld [ESI+8]
fld [ESI+4]
fld [ESI ]
fld1
fld1
// STACK: 1 1 x y z
fld ST(2)
fmul ST(0),ST(0)
fld ST(4)
fmul ST(0),ST(0)
faddp ST(1),ST(0)
fld ST(5)
fmul ST(0),ST(0)
faddp ST(1),ST(0)
// ST(0): x^2+y^2+z^2 == d^2
// ST(1): 1
// ST(2): 1
// ST(3): x
// ST(4): y
// ST(5): z
// now we need cos, sin to replace the current value
fld ST(0)
// STACK: D^2(temp), D^2(const), 1(will be cos), 1(will be sin/D), x, y, z
fmul fDivBy2
fsub ST(2),ST(0)
fmul fDivBy3
fsub ST(3),ST(0)
// STACK: D^2/3!, D^2, 1-D^2/2!(will be cos), 1-D^2/3!(will be sin/D), x, y, z
fmul ST(0),ST(1)
fmul fDivBy4
fadd ST(2),ST(0)
fmul fDivBy5
fadd ST(3),ST(0)
// STACK: D^4/5!, D^2, 1-D^2/2!+D^4/4!(will be cos), 1-D^2/3!+D^4/5!(will be sin/D), x, y, z
fmul ST(0),ST(1)
fmul fDivBy6
fsub ST(2),ST(0)
fmul fDivBy7
fsub ST(3),ST(0)
// STACK: D^6/7!, D^2, 1-D^2/2!+D^4/4!-D^6/6!, 1-D^2/3!+D^4/5!-D^6/7!, x, y, z
// the last step
fmulp ST(1),ST(0)
// STACK: D^8/7!, 1-D^2/2!+D^4/4!-D^6/6!, 1-D^2/3!+D^4/5!-D^6/7!, x, y, z
fmul fDivBy8
fadd ST(1),ST(0)
fmul fDivBy9
faddp ST(2),ST(0)
// STACK: 1-D^2/2!+D^4/4!-D^6/6!+D^8/8!, 1-D^2/3!+D^4/5!-D^6/7!+D^8/9!, x, y, z
// cos(D), sin(D)/D,x,y,z
fstp dword ptr [EDI]
fmul ST(1),ST(0)
fmul ST(2),ST(0)
fmulp ST(3),ST(0)
fstp dword ptr [EDI+ 4]
fstp dword ptr [EDI+ 8]
fstp dword ptr [EDI+12]
}
}
#endif