461 lines
12 KiB
C++
461 lines
12 KiB
C++
#include "stdafx.h"
|
|
//#include "CryAnimationBase.h"
|
|
#include "CrySkinRigidBasis.h"
|
|
|
|
#define FOR_TEST 0
|
|
|
|
#if FOR_TEST
|
|
#include "CryAnimation.h"
|
|
#include "CVars.h"
|
|
#endif
|
|
|
|
// returns the size of the skin, the number of bases being calculated
|
|
// by this skin. The bases are calculated into a 0-base continuous array
|
|
// tangents may be divided into subskins, each having different number of bases
|
|
// to skin, based on the performance consideration (strip mining)
|
|
unsigned CrySkinRigidBasis::size()const
|
|
{
|
|
return m_numDestBases;
|
|
}
|
|
|
|
// does the same as the base class init() but also remembers the number of bases (numVerts/2)
|
|
// for future reference
|
|
void CrySkinRigidBasis::init (unsigned numVerts, unsigned numAux, unsigned numSkipBones, unsigned numBones)
|
|
{
|
|
m_numDestBases = numVerts >> 1;
|
|
CrySkinBase::init (numVerts, numAux, numSkipBones, numBones);
|
|
}
|
|
|
|
|
|
void CrySkinRigidBasis::CStatistics::initSetDests (const CrySkinRigidBasis* pSkin)
|
|
{
|
|
const CrySkinAuxInt* pAux = &pSkin->m_arrAux[0];
|
|
const Vertex* pVertex = &pSkin->m_arrVertices[0];
|
|
setDests.clear();
|
|
arrNumLinks.clear();
|
|
|
|
for (unsigned nBone = pSkin->m_numSkipBones; nBone < pSkin->m_numBones; ++nBone)
|
|
{
|
|
// each bone has a group of (always rigid) vertices
|
|
|
|
// this is to take into account two groups: non-flipped and flipped tangents
|
|
for (int t = 0; t < 2; ++t)
|
|
{
|
|
// for each actual basis, we have two vertices
|
|
const Vertex* pGroupEnd = pVertex + (*pAux++<<1);
|
|
for (;pVertex < pGroupEnd; pVertex+=2)
|
|
{
|
|
unsigned nDestOffset = pVertex[0].nDest & 0xFFFFFF;
|
|
assert (nDestOffset < pSkin->m_numDestBases*sizeof(SPipTangentsA) && nDestOffset % sizeof(SPipTangentsA) == 0);
|
|
unsigned nDest = nDestOffset / sizeof(SPipTangentsA);
|
|
addDest(nDest);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CrySkinRigidBasis::CStatistics::addDest(unsigned nDest)
|
|
{
|
|
if (arrNumLinks.size() < nDest+1)
|
|
arrNumLinks.resize (nDest+1,0);
|
|
++arrNumLinks[nDest];
|
|
setDests.insert (nDest);
|
|
}
|
|
|
|
|
|
// does the skinning out of the given array of global matrices:
|
|
// calculates the bases and fills the PipVertices in
|
|
void CrySkinRigidBasis::skin (const Matrix44* pBones, SPipTangentsA* pDest)const
|
|
{
|
|
#ifdef DEFINE_PROFILER_FUNCTION
|
|
DEFINE_PROFILER_FUNCTION();
|
|
#endif
|
|
|
|
#if FOR_TEST
|
|
for (int i = 0; i < g_GetCVars()->ca_TestSkinningRepeats(); ++i)
|
|
#endif
|
|
{
|
|
const Matrix44* pBone = pBones + m_numSkipBones, *pBonesEnd = pBones + m_numBones;
|
|
const CrySkinAuxInt* pAux = &m_arrAux[0];
|
|
const Vertex* pVertex = &m_arrVertices[0];
|
|
|
|
for (; pBone!= pBonesEnd; ++pBone)
|
|
{
|
|
// each bone has a group of (always rigid) vertices
|
|
|
|
// for each actual basis, we have two vertices
|
|
const Vertex* pGroupEnd = pVertex + (*pAux++<<1);
|
|
for (;pVertex < pGroupEnd; pVertex+=2)
|
|
{
|
|
unsigned nDestOffset = pVertex[0].nDest & 0xFFFFFF;
|
|
assert (nDestOffset < m_numDestBases*sizeof(SPipTangentsA));
|
|
SPipTangentsA& rDest = *(SPipTangentsA*)(UINT_PTR(pDest) + nDestOffset);
|
|
Vec3d vTang = pBone->TransformVectorOLD(pVertex[0].pt);
|
|
Vec3d vBinorm = pBone->TransformVectorOLD(pVertex[1].pt);
|
|
rDest.m_Tangent = vTang;
|
|
rDest.m_Binormal = vBinorm;
|
|
rDest.m_TNormal = vTang^vBinorm;
|
|
}
|
|
|
|
// the flipped version
|
|
pGroupEnd = pVertex + (*pAux++<<1);
|
|
for (;pVertex < pGroupEnd; pVertex+=2)
|
|
{
|
|
unsigned nDestOffset = pVertex[0].nDest & 0xFFFFFF;
|
|
assert (nDestOffset < m_numDestBases*sizeof(SPipTangentsA));
|
|
SPipTangentsA& rDest = *(SPipTangentsA*)(UINT_PTR(pDest) + nDestOffset);
|
|
Vec3d vTang = pBone->TransformVectorOLD(pVertex[0].pt);
|
|
Vec3d vBinorm = pBone->TransformVectorOLD(pVertex[1].pt);
|
|
|
|
rDest.m_Tangent = vTang;
|
|
rDest.m_Binormal = vBinorm;
|
|
rDest.m_TNormal = vBinorm^vTang;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
#if defined(_CPU_X86) && !defined(LINUX)
|
|
// uses SSE for skinning; NOTE: EVERYTHING must be 16-aligned:
|
|
// destination, bones, and the data in this object
|
|
void CrySkinRigidBasis::skinSSE (const Matrix44* pBones, SPipTangentsA* pDest)const
|
|
{
|
|
#ifdef DEFINE_PROFILER_FUNCTION
|
|
DEFINE_PROFILER_FUNCTION();
|
|
#endif
|
|
|
|
#if defined(_DEBUG) && FOR_TEST
|
|
TElementaryArray<SPipTangentsA> arrTest ("CrySkinRigidBasis::skinSSE");
|
|
arrTest.reinit(size());
|
|
skin (pBones, &arrTest[0]);
|
|
#endif
|
|
|
|
#if FOR_TEST
|
|
for (int i = 0; i < g_GetCVars()->ca_TestSkinningRepeats(); ++i)
|
|
#endif
|
|
{
|
|
const Matrix44* pBone = pBones + m_numSkipBones, *pBoneEnd = pBones + m_numBones;
|
|
const CrySkinAuxInt* pAux = &m_arrAux[0];
|
|
const Vertex* pVertex = &m_arrVertices[0];
|
|
|
|
_asm
|
|
{
|
|
mov EDX, pAux
|
|
mov EBX, pVertex
|
|
mov EDI, pDest
|
|
mov ESI, pBone
|
|
|
|
// load the current matrix; we don't need the move component
|
|
startLoop:
|
|
cmp ESI, pBoneEnd
|
|
jz endLoop
|
|
movaps xmm0, [ESI]
|
|
movaps xmm1, [ESI+0x10]
|
|
movaps xmm2, [ESI+0x20]
|
|
add ESI, 0x40
|
|
|
|
// load the counter for the number of non-flipped tangets for this bone
|
|
#if CRY_SKIN_AUX_INT_SIZE==2
|
|
xor ECX,ECX
|
|
mov CX, word ptr [EDX]
|
|
add EDX, 2
|
|
#else
|
|
mov ECX, dword ptr [EDX]
|
|
add EDX, 4
|
|
#endif
|
|
test ECX, ECX
|
|
jz endLoopNonflipped
|
|
|
|
startLoopNonflipped:
|
|
// load the tangent vector
|
|
movaps xmm7, [EBX]
|
|
// load the binormal
|
|
// calculate the destination pointer
|
|
mov EAX, [EBX+0xC]
|
|
and EAX, 0xFFFFFF
|
|
add EAX, EDI
|
|
// EAX points to the destination triplet of vectors now
|
|
movaps xmm6, [EBX+0x10]
|
|
add EBX, 0x20
|
|
//prefetchnta [EBX]
|
|
|
|
// calculate the transformed tangent and binormal
|
|
movss xmm5, xmm7
|
|
shufps xmm5, xmm5, 0 // xmm5 = 4 copies of tangent.x
|
|
mulps xmm5, xmm0
|
|
movaps xmm4, xmm7
|
|
shufps xmm4, xmm4, 0x55 // xmm4 = 4 copies of tangent.y
|
|
mulps xmm4, xmm1
|
|
shufps xmm7, xmm7, 0xAA // xmm7 = 4 copies of tangent.z
|
|
mulps xmm7, xmm2
|
|
addps xmm7, xmm4
|
|
addps xmm7, xmm5
|
|
// xmm7 = transformed tangent
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX], xmm7
|
|
#else
|
|
//SSE_MOVSS(EAX,xmm7)
|
|
movss [EAX], xmm7
|
|
shufps xmm7,xmm7, 0x39 // roll right
|
|
movss [EAX+4], xmm7
|
|
shufps xmm7,xmm7, 0x39 // roll right
|
|
movss [EAX+8], xmm7
|
|
shufps xmm7, xmm7, 0x4E // roll left twice
|
|
#endif
|
|
|
|
// transform the binormal
|
|
movss xmm5, xmm6
|
|
shufps xmm5, xmm5, 0
|
|
mulps xmm5, xmm0
|
|
movaps xmm4, xmm6
|
|
shufps xmm4, xmm4, 0x55
|
|
mulps xmm4, xmm1
|
|
shufps xmm6, xmm6, 0xAA
|
|
mulps xmm6, xmm2
|
|
addps xmm6, xmm4
|
|
addps xmm6, xmm5
|
|
// xmm6 = transformed binormal
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX+0x10], xmm6
|
|
#else
|
|
//SSE_MOVSS(EAX+0xC,xmm7)
|
|
movss [EAX+0xC], xmm6
|
|
shufps xmm6,xmm6, 0x39
|
|
movss [EAX+0xC+4], xmm6
|
|
shufps xmm6,xmm6, 0x39
|
|
movss [EAX+0xC+8], xmm6
|
|
shufps xmm6, xmm6, 0x4E // roll left twice
|
|
#endif
|
|
|
|
// calculate the cross product tangent (xmm7)^binormal (xmm6)
|
|
movaps xmm5, xmm7
|
|
shufps xmm5, xmm5, 0x09 // roll right 3-base
|
|
movaps xmm4, xmm6
|
|
shufps xmm4, xmm4, 0x12 // roll left 3-base
|
|
//shufps xmm4, xmm4, 0x09 // roll right 3-base
|
|
mulps xmm5, xmm4
|
|
|
|
shufps xmm7, xmm7, 0x12 // roll left 3-base
|
|
shufps xmm6, xmm6, 0x09 // roll right 3-base
|
|
//shufps xmm7, xmm7, 0x09 // roll right 3-base
|
|
mulps xmm7, xmm6
|
|
subps xmm5, xmm7
|
|
//shufps xmm5,xmm5, 0x09
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX+0x20], xmm5
|
|
#else
|
|
//SSE_MOVSS(EAX+0x18,xmm5)
|
|
movss [EAX+0x18], xmm5
|
|
shufps xmm5,xmm5, 0x39
|
|
movss [EAX+0x18+4], xmm5
|
|
shufps xmm5,xmm5, 0x39
|
|
movss [EAX+0x18+8], xmm5
|
|
shufps xmm5, xmm5, 0x4E // roll left twice
|
|
#endif
|
|
|
|
dec ECX
|
|
jnz startLoopNonflipped
|
|
//loop startLoopNonflipped
|
|
endLoopNonflipped:
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// Flipped loop
|
|
|
|
// load the counter for the number of flipped tangets for this bone
|
|
#if CRY_SKIN_AUX_INT_SIZE==2
|
|
xor ECX,ECX
|
|
mov CX, word ptr [EDX]
|
|
add EDX, 2
|
|
#else
|
|
mov ECX, dword ptr [EDX]
|
|
add EDX, 4
|
|
#endif
|
|
test ECX, ECX
|
|
jz endLoopFlipped
|
|
|
|
startLoopFlipped:
|
|
// load the tangent vector
|
|
movaps xmm7, [EBX]
|
|
// load the binormal
|
|
movaps xmm6, [EBX+0x10]
|
|
// calculate the destination pointer
|
|
mov EAX, [EBX+0xC]
|
|
and EAX, 0xFFFFFF
|
|
add EAX, EDI
|
|
// EAX points to the destination triplet of vectors now
|
|
add EBX, 0x20
|
|
//prefetchnta [EBX]
|
|
|
|
// calculate the transformed tangent and binormal
|
|
movss xmm5, xmm7
|
|
shufps xmm5, xmm5, 0 // xmm5 = 4 copies of tangent.x
|
|
mulps xmm5, xmm0
|
|
movaps xmm4, xmm7
|
|
shufps xmm4, xmm4, 0x55 // xmm4 = 4 copies of tangent.y
|
|
mulps xmm4, xmm1
|
|
shufps xmm7, xmm7, 0xAA // xmm7 = 4 copies of tangent.z
|
|
mulps xmm7, xmm2
|
|
addps xmm7, xmm4
|
|
addps xmm7, xmm5
|
|
// xmm7 = transformed tangent
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX], xmm7
|
|
#else
|
|
//SSE_MOVSS(EAX,xmm7)
|
|
movss [EAX], xmm7
|
|
shufps xmm7,xmm7, 0x39
|
|
movss [EAX+4], xmm7
|
|
shufps xmm7,xmm7, 0x39
|
|
movss [EAX+8], xmm7
|
|
shufps xmm7, xmm7, 0x4E // roll left twice
|
|
#endif
|
|
|
|
// transform the binormal
|
|
movss xmm5, xmm6
|
|
shufps xmm5, xmm5, 0
|
|
mulps xmm5, xmm0
|
|
movaps xmm4, xmm6
|
|
shufps xmm4, xmm4, 0x55
|
|
mulps xmm4, xmm1
|
|
shufps xmm6, xmm6, 0xAA
|
|
mulps xmm6, xmm2
|
|
addps xmm6, xmm4
|
|
addps xmm6, xmm5
|
|
// xmm6 = transformed binormal
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX+0x10], xmm6
|
|
#else
|
|
//SSE_MOVSS(EAX+0xC,xmm6)
|
|
movss [EAX+0xC], xmm6
|
|
shufps xmm6,xmm6, 0x39
|
|
movss [EAX+0xC+4], xmm6
|
|
shufps xmm6,xmm6, 0x39
|
|
movss [EAX+0xC+8], xmm6
|
|
shufps xmm6, xmm6, 0x4E // roll left twice
|
|
#endif
|
|
|
|
// calculate the cross product binormal (xmm6)^tangent (xmm7)
|
|
movaps xmm5, xmm7
|
|
shufps xmm5, xmm5, 0x09 // roll right 3-base
|
|
movaps xmm4, xmm6
|
|
shufps xmm4, xmm4, 0x12 // roll left 3-base
|
|
//shufps xmm4, xmm4, 0x09 // roll right 3-base
|
|
mulps xmm5, xmm4
|
|
|
|
shufps xmm7, xmm7, 0x12 // roll left 3-base
|
|
shufps xmm6, xmm6, 0x09 // roll right 3-base
|
|
//shufps xmm7, xmm7, 0x09 // roll right 3-base
|
|
mulps xmm7, xmm6
|
|
subps xmm7, xmm5
|
|
//shufps xmm7,xmm7, 9
|
|
#if sizeofSPipTangentsA == 0x30
|
|
movaps [EAX+0x20], xmm7
|
|
#else
|
|
//SSE_MOVSS(EAX+0x18,xmm7)
|
|
movss [EAX+0x18], xmm7
|
|
shufps xmm7,xmm7, 0x39
|
|
movss [EAX+0x18+4], xmm7
|
|
shufps xmm7,xmm7, 0x39
|
|
movss [EAX+0x18+8], xmm7
|
|
shufps xmm7, xmm7, 0x4E // roll left twice
|
|
#endif
|
|
|
|
dec ECX
|
|
jnz startLoopFlipped
|
|
//loop startLoopFlipped
|
|
endLoopFlipped:
|
|
jmp startLoop
|
|
endLoop:
|
|
}
|
|
}
|
|
#if defined(_DEBUG) && FOR_TEST
|
|
unsigned numBases = 0;
|
|
|
|
for (unsigned nBone = m_numSkipBones; nBone < m_numBones; ++nBone)
|
|
{
|
|
assert (numBases < size());
|
|
const CrySkinAuxInt* pAux = &m_arrAux[(nBone-m_numSkipBones)*2];
|
|
SPipTangentsA* pBTest;
|
|
SPipTangentsA* pBDest;
|
|
const Vertex*pVertex = &m_arrVertices[numBases*2];
|
|
|
|
// check the non-flipped bases
|
|
unsigned i, j;
|
|
float dT, dB, dN;
|
|
for (i = 0; i < pAux[0]; ++i, ++numBases, pVertex+=2)
|
|
{
|
|
pBTest = (SPipTangentsA*)(((unsigned)&arrTest[0])+(pVertex->nDest&0xFFFFFF));
|
|
pBDest = (SPipTangentsA*)(((unsigned)pDest)+(pVertex->nDest&0xFFFFFF));
|
|
dT = Distance2(pBTest->m_Tangent,pBDest->m_Tangent);
|
|
dB = Distance2(pBTest->m_Binormal,pBDest->m_Binormal);
|
|
dN = Distance2(pBTest->m_TNormal,pBDest->m_TNormal);
|
|
assert (dT < 1e-6 && dB < 1e-6 && dN < 1e-6);
|
|
}
|
|
for (j = 0; j < pAux[1]; ++j, ++numBases, pVertex+=2)
|
|
{
|
|
pBTest = (SPipTangentsA*)(((unsigned)&arrTest[0])+(pVertex->nDest&0xFFFFFF));
|
|
pBDest = (SPipTangentsA*)(((unsigned)pDest)+(pVertex->nDest&0xFFFFFF));
|
|
dT = Distance2(pBTest->m_Tangent,pBDest->m_Tangent);
|
|
dB = Distance2(pBTest->m_Binormal,pBDest->m_Binormal);
|
|
dN = Distance2(pBTest->m_TNormal,pBDest->m_TNormal);
|
|
assert (dT < 1e-6 && dB < 1e-6 && dN < 1e-6);
|
|
}
|
|
}
|
|
assert (numBases == size());
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// returns the number of bytes occupied by this structure and all its contained objects
|
|
unsigned CrySkinRigidBasis::sizeofThis()const
|
|
{
|
|
return CrySkinBase::sizeofThis() + sizeof(CrySkinRigidBasis) - sizeof(CrySkinBase);
|
|
}
|
|
|
|
unsigned CrySkinRigidBasis::Serialize (bool bSave, void* pBuffer, unsigned nBufSize)
|
|
{
|
|
if (bSave)
|
|
{
|
|
unsigned nWrittenBytes = CrySkinBase::Serialize_PC(true, pBuffer, nBufSize);
|
|
if (nWrittenBytes)
|
|
{
|
|
if (pBuffer)
|
|
*(unsigned*)(((char*)pBuffer)+nWrittenBytes) = m_numDestBases;
|
|
|
|
return sizeof(unsigned) + nWrittenBytes;
|
|
}
|
|
else
|
|
{
|
|
// error
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
unsigned nReadBytes = CrySkinBase::Serialize_PC(false, pBuffer, nBufSize);
|
|
if (nReadBytes)
|
|
{
|
|
if (nBufSize - nReadBytes >= sizeof(unsigned))
|
|
{
|
|
m_numDestBases = *(unsigned*)(((char*)pBuffer)+nReadBytes);
|
|
return nReadBytes + sizeof(unsigned);
|
|
}
|
|
else
|
|
{
|
|
//error - perhaps not the tang stream
|
|
m_numDestBases = 0;
|
|
clear();
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//error
|
|
return 0;
|
|
}
|
|
}
|
|
}
|