Files
FC1/ResourceCompilerPC/CrySkinFull.cpp
romkazvo 34d6c5d489 123
2023-08-07 19:29:24 +08:00

516 lines
14 KiB
C++

#include "stdafx.h"
#include "MathUtils.h"
#include "CrySkinBuilderBase.h"
#include "CrySkinFull.h"
#include "platform.h"
#define FOR_TEST 0
// takes each offset and includes it into the bbox of corresponding bone
/*void CrySkinFull::computeBoneBBoxes(CryBBoxA16* pBBoxes)
{
CrySkinAuxInt* pAux = &m_arrAux[0];
Vertex* pVertex = &m_arrVertices[0];
CryBBoxA16* pBBox = pBBoxes + m_numSkipBones, *pBBoxEnd = pBBoxes + m_numBones;
for (; pBBox!= pBBoxEnd; ++pBBox)
{
// each bone has a group of vertices
// first process the rigid vertices
Vertex* pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex)
pBBox->include(pVertex->pt);
// process the smooth1 vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
pBBox->include(pVertex->pt);
// process the smooth vertices that were the second time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
pBBox->include(pVertex->pt);
}
}*/
//////////////////////////////////////////////////////////////////////////
// does the skinning out of the given array of global matrices
void CrySkinFull::skin (const Matrix44* pBones, Vec3d* pDest)
{
#ifdef DEFINE_PROFILER_FUNCTION
DEFINE_PROFILER_FUNCTION();
#endif
//PROFILE_FRAME_SELF(PureSkin);
#if FOR_TEST
for (int i = 0; i < g_GetCVars()->ca_TestSkinningRepeats(); ++i)
#endif
{
const Matrix44* pBone = pBones + m_numSkipBones;
const Matrix44* pBonesEnd = pBones + m_numBones;
u32 s = 0;
u32 t = 0;
#ifdef _DEBUG
TFixedArray<float> arrW;
arrW.reinit(m_numDests, 0);
#endif
for (; pBone!= pBonesEnd; ++pBone)
{
Matrix34 m34 = Matrix34( GetTransposed44(*pBone) );
// first process the rigid vertices
u32 a0=m_arrAux[t];
for (u32 i=0; i<a0; i++ )
{
//_mm_prefetch( (char*)&m_arrVertices[s+20].pt, _MM_HINT_T0 );
pDest[m_arrVertices[s].nDest] = m34 * m_arrVertices[s].pt;
#ifdef _DEBUG
assert (arrW[m_arrVertices[s].nDest] == 0);
arrW[m_arrVertices[s].nDest] = 1;
#endif
s++;
}
t++;
// process the smooth1 vertices that were the first time met
u32 a1=m_arrAux[t]; t++;
for (u32 i=0; i<a1; i++ )
{
//_mm_prefetch( (char*)&m_arrVertices[s+20].pt, _MM_HINT_T0 );
pDest[m_arrAux[t]]= (m34*m_arrVertices[s].pt) * m_arrVertices[s].fWeight;
#ifdef _DEBUG
assert (arrW[m_arrAux[t]] == 0);
arrW[m_arrAux[t]] = m_arrVertices[s].fWeight;
#endif
s++;
t++;
}
// process the smooth vertices that were the first time met
u32 a2=m_arrAux[t]; t++;
for (u32 i=0; i<a2; i++)
{
//_mm_prefetch( (char*)&m_arrVertices[s+20].pt, _MM_HINT_T0 );
pDest[m_arrAux[t]] += (m34*m_arrVertices[s].pt) * m_arrVertices[s].fWeight;
#ifdef _DEBUG
assert (arrW[m_arrAux[t]] > 0 && arrW[m_arrAux[t]] < 1.005f);
arrW[m_arrAux[t]] += m_arrVertices[s].fWeight;
assert (arrW[m_arrAux[t]] > 0 && arrW[m_arrAux[t]] < 1.005f);
#endif
s++;
t++;
}
}
/*#ifdef _DEBUG
for (unsigned i = 0; i < m_numDests; ++i)
assert (arrW[i] > 0.995f && arrW[i] < 1.005f);
#endif
*/
}
}
//////////////////////////////////////////////////////////////////////////
// does the skinning out of the given array of global matrices
void CrySkinFull::skinAsVec3d16 (const Matrix44* pBones, Vec3dA16* pDest)
{
//PROFILE_FRAME_SELF(PureSkin);
#if FOR_TEST
for (int i = 0; i < g_GetCVars()->ca_TestSkinningRepeats(); ++i)
#endif
{
const Matrix44* pBone = pBones + m_numSkipBones, *pBonesEnd = pBones + m_numBones;
CrySkinAuxInt* pAux = &m_arrAux[0];
Vertex* pVertex = &m_arrVertices[0];
#ifdef _DEBUG
TFixedArray<float> arrW;
arrW.reinit(m_numDests, 0);
#endif
for (; pBone!= pBonesEnd; ++pBone)
{
// each bone has a group of vertices
// first process the rigid vertices
Vertex* pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex)
{
//CHANGED_BY_IVO - INVALID CHANGE, PLEASE REVISE
pDest[pVertex->nDest].v = pBone->TransformVectorOLD(pVertex->pt);
// Temporary fixed by Sergiy. A new operation in the Matrix must be made
//pDest[pVertex->nDest].v = GetTransposed44(*pBone) * (pVertex->pt);
//transformVectorNoTrans (pDest[pVertex->nDest].v, pVertex->pt, *pBone);
#ifdef _DEBUG
assert (arrW[pVertex->nDest] == 0);
arrW[pVertex->nDest] = 1;
#endif
}
// process the smooth1 vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
transformWVector (pDest[*pAux].v, *pBone, *pVertex);
#ifdef _DEBUG
assert (arrW[*pAux] == 0);
arrW[*pAux] = pVertex->fWeight;
#endif
}
// process the smooth vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
addWVector (pDest[*pAux].v, *pBone, *pVertex);
#ifdef _DEBUG
assert (arrW[*pAux] > 0 && arrW[*pAux] < 1.005f);
arrW[*pAux] += pVertex->fWeight;
assert (arrW[*pAux] > 0 && arrW[*pAux] < 1.005f);
#endif
}
}
#ifdef _DEBUG
for (unsigned i = 0; i < m_numDests; ++i)
assert (arrW[i] > 0.995f && arrW[i] < 1.005f);
#endif
}
}
void CrySkinFull::CStatistics::addDest(unsigned nDest)
{
if (arrNumLinks.size() < nDest+1)
arrNumLinks.resize (nDest+1,0);
++arrNumLinks[nDest];
setDests.insert (nDest);
}
void CrySkinFull::CStatistics::initSetDests (const CrySkinFull* pSkin)
{
const CrySkinAuxInt* pAux = &pSkin->m_arrAux[0];
const Vertex* pVertex = &pSkin->m_arrVertices[0];
arrNumLinks.clear();
for (unsigned nBone = pSkin->m_numSkipBones; nBone < pSkin->m_numBones; ++nBone)
{
// each bone has a group of vertices
// first process the rigid vertices
const Vertex* pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex)
{
unsigned nDest = pVertex->nDest;
addDest (nDest);
assert (arrNumLinks[nDest] == 1);
}
// process the smooth1 vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
unsigned nDest = *pAux;
addDest (nDest);
assert (arrNumLinks[nDest] == 1);
//pVertex->fWeight is the weight of the vertex
}
// process the smooth vertices that were the second/etc time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
unsigned nDest = *pAux;
addDest (nDest);
assert (arrNumLinks[nDest] > 1);
// pVertex->fWeight contains the weight of the vertex
}
}
}
//////////////////////////////////////////////////////////////////////////
// validates the skin against the given geom info
#if defined (_DEBUG)
void CrySkinFull::validate (const ICrySkinSource* pGeometry)
{
TElementaryArray<unsigned> arrNumLinks ("CrySkinFull::validate.arrNumLinks");
arrNumLinks.reinit (pGeometry->numVertices(), 0);
CrySkinAuxInt* pAux = &m_arrAux[0];
Vertex* pVertex = &m_arrVertices[0];
for (unsigned nBone = m_numSkipBones; nBone < m_numBones; ++nBone)
{
// each bone has a group of vertices
// first process the rigid vertices
Vertex* pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex)
{
unsigned nDest = pVertex->nDest;
const CryVertexBinding& rLink = pGeometry->getLink(nDest);
assert (arrNumLinks[nDest] == 0);
arrNumLinks[nDest] = 1;
assert (rLink.size()==1);
assert (rLink[0].Blending == 1);
assert (rLink[0].BoneID == nBone);
}
// process the smooth1 vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
unsigned nDest = *pAux;
const CryVertexBinding& rLink = pGeometry->getLink(nDest);
assert (arrNumLinks[nDest]++ == 0);
assert (rLink.size()>1);
float fLegacyWeight = rLink.getBoneWeight(nBone);
assert (pVertex->fWeight == fLegacyWeight);
}
// process the smooth vertices that were the first time met
pGroupEnd = pVertex + *pAux++;
for (;pVertex < pGroupEnd; ++pVertex, ++pAux)
{
unsigned nDest = *pAux;
const CryVertexBinding& rLink = pGeometry->getLink(nDest);
assert (arrNumLinks[nDest]++ > 0);
assert (arrNumLinks[nDest] <= rLink.size());
assert (rLink.size()>1);
float fLegacyWeight = rLink.getBoneWeight(nBone);
assert (rLink.hasBoneWeight(nBone,pVertex->fWeight));
}
}
for (unsigned nVert = 0; nVert < pGeometry->numVertices(); ++nVert)
assert (arrNumLinks[nVert] == pGeometry->getLink(nVert).size());
}
#endif
#if ( defined (_CPU_X86) || defined (_CPU_AMD64) ) & !defined(LINUX)
DEFINE_ALIGNED_DATA( CryBBoxA16, CrySkinFull::g_BBox, 32 ); // align by cache line boundaries
#if defined (_CPU_AMD64)
extern "C" void Amd64Skinner(CrySkinAuxInt* pAux, CrySkinVertexAligned* pVertex, Vec3dA16* pDest, const Matrix44* pBone, Vec3dA16* pvMin,const Matrix44* pBoneEnd);
#endif
void CrySkinFull::skinSSE (const Matrix44* pBones, Vec3dA16* pDest)
{
#ifdef DEFINE_PROFILER_FUNCTION
DEFINE_PROFILER_FUNCTION();
#endif
//PROFILE_FRAME_SELF(PureSkin);
const Matrix44* pBone = pBones + m_numSkipBones, *pBoneEnd = pBones + m_numBones;
CrySkinAuxInt* pAux = &m_arrAux[0];
Vertex* pVertex = &m_arrVertices[0];
// set the bbox to the negative volume to make sure the bbox will calculate starting from the first vertex
g_BBox.vMin.v = Vec3d(1e6,1e6,1e6);// = pBone->GetTranslation();
g_BBox.vMax.v = Vec3d(-1e6,-1e6,-1e6);// = pBone->GetTranslation();
#if FOR_TEST
for (int i = 0; i < g_GetCVars()->ca_TestSkinningRepeats(); ++i)
#endif
#if defined(_CPU_AMD64)
Amd64Skinner(pAux, pVertex, pDest, pBone, &g_BBox.vMin, pBoneEnd);
#else
_asm
{
mov EDX, pAux
mov EBX, pVertex
mov EDI, pDest
mov ESI, pBone
// load the current matrix; we don't need the move component
startLoop:
cmp ESI, pBoneEnd
jz endLoop
movaps xmm0, [ESI]
movaps xmm1, [ESI+0x10]
movaps xmm2, [ESI+0x20]
movaps xmm3, [ESI+0x30]
add ESI, 0x40
// load the counter for the number of non-flipped tangets for this bone
#if CRY_SKIN_AUX_INT_SIZE==2
xor ECX,ECX
mov CX, word ptr [EDX]
add EDX, 2
#else
mov ECX, dword ptr [EDX]
add EDX, 4
#endif
test ECX, ECX
jz endLoopRigid
startLoopRigid:
// load the offset
movaps xmm7, [EBX]
// calculate the destination pointer
mov EAX, [EBX+0xC]
and EAX, 0xFFFFFF
add EAX, EAX
// EDI+EAX*8 points to the destination vector now
add EBX, 0x10
// transform the vertex
movss xmm6, xmm7
shufps xmm6, xmm6, 0 // xmm6 = 4 copies of offset.x
mulps xmm6, xmm0
movaps xmm5, xmm7
shufps xmm5, xmm5, 0x55 // xmm5 = 4 copies of offset.y
mulps xmm5, xmm1
shufps xmm7, xmm7, 0xAA // xmm7 = 4 copies of offset.z
mulps xmm7, xmm2
addps xmm7, xmm5
addps xmm7, xmm6
addps xmm7, xmm3 // xmm7 = fully transformed vertex, store it
// xmm7 = transformed vertex
movaps [EDI+EAX*8], xmm7
//----------------------
// Calculation of BBox
// xmm5 will be the min, xmm6 will be the max of bbox
movaps xmm5, xmm7
movaps xmm6, xmm7
minps xmm5, g_BBox.vMin
maxps xmm6, g_BBox.vMax
movaps g_BBox.vMin, xmm5
movaps g_BBox.vMax, xmm6
loop startLoopRigid
endLoopRigid:
//////////////////////////////////////////////////////////
// Smooth-1 loop
// load the counter for the number of smooth vertices met for the first time
//////////////////////////////////////////////////////////
#if CRY_SKIN_AUX_INT_SIZE==2
xor ECX,ECX
mov CX, word ptr [EDX]
add EDX, 2
#else
mov ECX, dword ptr [EDX]
add EDX, 4
#endif
test ECX, ECX
jz endLoopSmooth1
startLoopSmooth1:
// load the offset & blending
movaps xmm7, [EBX]
// calculate the destination pointer
#if CRY_SKIN_AUX_INT_SIZE==2
xor EAX,EAX
mov AX, word ptr [EDX]
add EDX, 2
#else
mov EAX, dword ptr [EDX]
add EDX, 4
#endif
add EAX, EAX
// EDI+EAX*8 points to the destination vector now
add EBX, 0x10
// transform the vertex
movss xmm6, xmm7
shufps xmm6, xmm6, 0 // xmm6 = 4 copies of offset.x
mulps xmm6, xmm0
movaps xmm5, xmm7
shufps xmm5, xmm5, 0x55 // xmm5 = 4 copies of offset.y
mulps xmm5, xmm1
movaps xmm4, xmm7
shufps xmm4, xmm4, 0xAA // xmm4 = 4 copies of offset.z
mulps xmm4, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3 // xmm4 = fully transformed vertex, blend it
shufps xmm7, xmm7, 0xFF // xmm7 = 4 copies of blending
mulps xmm7, xmm4
// xmm7 = transformed and blended vertex
movaps [EDI+EAX*8], xmm7
loop startLoopSmooth1
//loop startLoopNonflipped
endLoopSmooth1:
//////////////////////////////////////////////////////////
// Smooth-2 loop
// load the counter for the number of smooth vertices met for the second time
//////////////////////////////////////////////////////////
#if CRY_SKIN_AUX_INT_SIZE==2
xor ECX,ECX
mov CX, word ptr [EDX]
add EDX, 2
#else
mov ECX, dword ptr [EDX]
add EDX, 4
#endif
test ECX, ECX
jz endLoopSmooth2
startLoopSmooth2:
// load the offset & blending
movaps xmm7, [EBX]
// calculate the destination pointer
#if CRY_SKIN_AUX_INT_SIZE==2
xor EAX,EAX
mov AX, word ptr [EDX]
add EDX, 2
#else
mov EAX, dword ptr [EDX]
add EDX, 4
#endif
shl EAX, 4
add EAX, EDI
// EAX points to the destination vector now
add EBX, 0x10
// transform the vertex
movss xmm6, xmm7
shufps xmm6, xmm6, 0 // xmm6 = 4 copies of offset.x
mulps xmm6, xmm0
movaps xmm5, xmm7
shufps xmm5, xmm5, 0x55 // xmm5 = 4 copies of offset.y
mulps xmm5, xmm1
movaps xmm4, xmm7
shufps xmm4, xmm4, 0xAA // xmm4 = 4 copies of offset.z
mulps xmm4, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3 // xmm4 = fully transformed vertex, blend it
shufps xmm7, xmm7, 0xFF // xmm7 = 4 copies of blending
mulps xmm7, xmm4
// xmm7 = transformed and blended vertex
addps xmm7, [EAX]
movaps [EAX], xmm7
loop startLoopSmooth2
//loop startLoopNonflipped
endLoopSmooth2:
jmp startLoop
endLoop:
}
#endif // _CPU_AMD64
}
#endif