123
This commit is contained in:
163
CryAnimation/QuaternionExponentX87.c
Normal file
163
CryAnimation/QuaternionExponentX87.c
Normal file
@@ -0,0 +1,163 @@
|
||||
#include "QuaternionExponentX87.h"
|
||||
|
||||
#ifdef _CPU_X86
|
||||
static const float fEpsilon = 1e-4f;
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// x87 asm optimized quaternion exponent
|
||||
// PARAMETERS:
|
||||
// pSrcVector[IN] - the vector to calculate the exponent for
|
||||
// pDstQuat [OUT]- the quaternion (exponent of the input)
|
||||
// NOTE:
|
||||
// The input vector mimics a quaternion with 0 real component (W)
|
||||
// This version uses FSINCOS, which takes ~70% of execution time
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void quaternionExponent_x87(const float* pSrc, float* pDst)
|
||||
{
|
||||
_asm
|
||||
{
|
||||
mov ESI, pSrc
|
||||
mov EDI, pDst
|
||||
|
||||
// double d = sqrt( double(pSrc[0])*pSrc[1] + double(pSrc[1])*pSrc[1] + double(pSrc[2])*pSrc[2]);
|
||||
|
||||
fld [ESI+8]
|
||||
fld [ESI+4]
|
||||
fld [ESI ]
|
||||
fld ST(0)
|
||||
fmul ST(0),ST(0)
|
||||
fld ST(2)
|
||||
fmul ST(0),ST(0)
|
||||
faddp ST(1),ST(0)
|
||||
fld ST(3)
|
||||
fmul ST(0),ST(0)
|
||||
faddp ST(1),ST(0)
|
||||
|
||||
// ST(0): x^2+y^2+z^2 == d^2
|
||||
// ST(1): x
|
||||
// ST(2): y
|
||||
// ST(3): z
|
||||
|
||||
fld fEpsilon
|
||||
fcomip ST, ST(1)
|
||||
jnc small_rotation // this path is almost never taken
|
||||
|
||||
fsqrt
|
||||
fld ST(0)
|
||||
|
||||
// now we need cos, sin to replace the current value
|
||||
fsincos
|
||||
// STACK: cos, sin, sqrt, x, y, z
|
||||
|
||||
fstp dword ptr [EDI]
|
||||
fdivrp ST(1),ST(0) // STACK: sin(d)/d, x,y,z
|
||||
|
||||
fmul ST(1),ST(0)
|
||||
fmul ST(2),ST(0)
|
||||
fmulp ST(3),ST(0)
|
||||
fstp dword ptr [EDI+ 4]
|
||||
fstp dword ptr [EDI+ 8]
|
||||
fstp dword ptr [EDI+12]
|
||||
}
|
||||
return;
|
||||
_asm
|
||||
{
|
||||
small_rotation:
|
||||
fld1
|
||||
fsubrp ST(1),ST(0)
|
||||
fstp dword ptr [EDI ]
|
||||
fstp dword ptr [EDI+4]
|
||||
fstp dword ptr [EDI+8]
|
||||
fstp dword ptr [EDI+12]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const float
|
||||
fDivBy2 = 1/2.0f,
|
||||
fDivBy3 = 1/3.0f,
|
||||
fDivBy4 = 1/4.0f,
|
||||
fDivBy5 = 1/5.0f,
|
||||
fDivBy6 = 1/6.0f,
|
||||
fDivBy7 = 1/7.0f,
|
||||
fDivBy8 = 1/8.0f,
|
||||
fDivBy9 = 1/9.0f;
|
||||
|
||||
// Takes pSrc: the x,y,z of the imaginary part of the quaternion 0+xi+yj+zk to calculate the exponent
|
||||
// into pDst: the x,y,z,w of the resulting quaternion IN THAT ORDER
|
||||
void quaternionExponent_x87approx(const float* pSrc, float* pDst)
|
||||
{
|
||||
_asm
|
||||
{
|
||||
mov ESI, pSrc
|
||||
mov EDI, pDst
|
||||
|
||||
// double d = sqrt( double(pSrc[0])*pSrc[1] + double(pSrc[1])*pSrc[1] + double(pSrc[2])*pSrc[2]);
|
||||
|
||||
fld [ESI+8]
|
||||
fld [ESI+4]
|
||||
fld [ESI ]
|
||||
fld1
|
||||
fld1
|
||||
// STACK: 1 1 x y z
|
||||
fld ST(2)
|
||||
fmul ST(0),ST(0)
|
||||
fld ST(4)
|
||||
fmul ST(0),ST(0)
|
||||
faddp ST(1),ST(0)
|
||||
fld ST(5)
|
||||
fmul ST(0),ST(0)
|
||||
faddp ST(1),ST(0)
|
||||
|
||||
// ST(0): x^2+y^2+z^2 == d^2
|
||||
// ST(1): 1
|
||||
// ST(2): 1
|
||||
// ST(3): x
|
||||
// ST(4): y
|
||||
// ST(5): z
|
||||
|
||||
// now we need cos, sin to replace the current value
|
||||
fld ST(0)
|
||||
// STACK: D^2(temp), D^2(const), 1(will be cos), 1(will be sin/D), x, y, z
|
||||
fmul fDivBy2
|
||||
fsub ST(2),ST(0)
|
||||
fmul fDivBy3
|
||||
fsub ST(3),ST(0)
|
||||
|
||||
// STACK: D^2/3!, D^2, 1-D^2/2!(will be cos), 1-D^2/3!(will be sin/D), x, y, z
|
||||
|
||||
fmul ST(0),ST(1)
|
||||
fmul fDivBy4
|
||||
fadd ST(2),ST(0)
|
||||
fmul fDivBy5
|
||||
fadd ST(3),ST(0)
|
||||
|
||||
// STACK: D^4/5!, D^2, 1-D^2/2!+D^4/4!(will be cos), 1-D^2/3!+D^4/5!(will be sin/D), x, y, z
|
||||
fmul ST(0),ST(1)
|
||||
fmul fDivBy6
|
||||
fsub ST(2),ST(0)
|
||||
fmul fDivBy7
|
||||
fsub ST(3),ST(0)
|
||||
|
||||
// STACK: D^6/7!, D^2, 1-D^2/2!+D^4/4!-D^6/6!, 1-D^2/3!+D^4/5!-D^6/7!, x, y, z
|
||||
|
||||
// the last step
|
||||
fmulp ST(1),ST(0)
|
||||
// STACK: D^8/7!, 1-D^2/2!+D^4/4!-D^6/6!, 1-D^2/3!+D^4/5!-D^6/7!, x, y, z
|
||||
fmul fDivBy8
|
||||
fadd ST(1),ST(0)
|
||||
fmul fDivBy9
|
||||
faddp ST(2),ST(0)
|
||||
// STACK: 1-D^2/2!+D^4/4!-D^6/6!+D^8/8!, 1-D^2/3!+D^4/5!-D^6/7!+D^8/9!, x, y, z
|
||||
// cos(D), sin(D)/D,x,y,z
|
||||
|
||||
fstp dword ptr [EDI]
|
||||
|
||||
fmul ST(1),ST(0)
|
||||
fmul ST(2),ST(0)
|
||||
fmulp ST(3),ST(0)
|
||||
fstp dword ptr [EDI+ 4]
|
||||
fstp dword ptr [EDI+ 8]
|
||||
fstp dword ptr [EDI+12]
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user