1057 lines
31 KiB
C++
1057 lines
31 KiB
C++
/*=============================================================================
|
|
RenderPC.cpp: Cry Render support precompiled header generator.
|
|
Copyright 2001 Crytek Studios. All Rights Reserved.
|
|
|
|
Revision history:
|
|
* Created by Honitch Andrey
|
|
|
|
=============================================================================*/
|
|
|
|
#define CRY_API
|
|
|
|
#ifdef _DEBUG
|
|
#define CRTDBG_MAP_ALLOC
|
|
#endif //_DEBUG
|
|
|
|
//! Include standart headers.
|
|
#include <assert.h>
|
|
|
|
//#define PS2
|
|
//#define OPENGL
|
|
|
|
|
|
#ifdef _XBOX
|
|
|
|
//! Include standart headers.
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <memory.h>
|
|
#include <io.h>
|
|
#include <memory.h>
|
|
#include <time.h>
|
|
#include <direct.h>
|
|
#include <search.h>
|
|
#include <stdarg.h>
|
|
|
|
typedef unsigned long DWORD;
|
|
typedef unsigned short WORD;
|
|
typedef unsigned char BYTE;
|
|
|
|
#include <xtl.h>
|
|
|
|
#else
|
|
|
|
#include <windows.h>
|
|
|
|
#endif
|
|
|
|
#include <platform.h>
|
|
|
|
// enable memory pool usage
|
|
#define USE_NEWPOOL
|
|
#include <CryMemoryManager.h>
|
|
|
|
#include "CrtOverrides.h"
|
|
|
|
#if defined _DEBUG && defined OPENGL
|
|
#define DEBUGALLOC
|
|
#endif
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// STL //////////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
#include <vector>
|
|
#include <list>
|
|
#include <map>
|
|
#include <hash_map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <algorithm>
|
|
|
|
typedef const char* cstr;
|
|
|
|
#define SIZEOF_ARRAY(arr) (sizeof(arr)/sizeof((arr)[0]))
|
|
|
|
// Include common headers.
|
|
//#include "Common\CryHelpers.h"
|
|
|
|
//typedef string String;
|
|
|
|
#ifdef DEBUGALLOC
|
|
|
|
#include <crtdbg.h>
|
|
#define DEBUG_CLIENTBLOCK new( _NORMAL_BLOCK, __FILE__, __LINE__)
|
|
#define new DEBUG_CLIENTBLOCK
|
|
|
|
// memman
|
|
#define calloc(s,t) _calloc_dbg(s, t, _NORMAL_BLOCK, __FILE__, __LINE__)
|
|
#define malloc(s) _malloc_dbg(s, _NORMAL_BLOCK, __FILE__, __LINE__)
|
|
#define realloc(p, s) _realloc_dbg(p, s, _NORMAL_BLOCK, __FILE__, __LINE__)
|
|
|
|
#endif
|
|
|
|
|
|
#include <list2.h>
|
|
#include <Names.h>
|
|
|
|
#define MAX_TMU 8
|
|
|
|
//! Include main interfaces.
|
|
#include <ICryPak.h>
|
|
#include <IEntitySystem.h>
|
|
#include <IProcess.h>
|
|
#include <ITimer.h>
|
|
#include <ISystem.h>
|
|
#include <ILog.h>
|
|
#include <IPhysics.h>
|
|
#include <IConsole.h>
|
|
#include <IRenderer.h>
|
|
#include <IStreamEngine.h>
|
|
#include <CrySizer.h>
|
|
|
|
#include "Font.h"
|
|
#include "Except.h"
|
|
|
|
#include <Cry_Math.h>
|
|
#include "Cry_Camera.h"
|
|
//#include "_Malloc.h"
|
|
#include "math.h"
|
|
#include "Common/Mkl/Mkl.h"
|
|
|
|
#include <VertexFormats.h>
|
|
#include <CREPolyMesh.h>
|
|
|
|
#include "Common/Shaders/Shader.h"
|
|
//#include "Common/XFile/File.h"
|
|
//#include "Common/Image.h"
|
|
#include "Common/Shaders/CShader.h"
|
|
#include "Common/EvalFuncs.h"
|
|
#include "Common/RenderPipeline.h"
|
|
#include "Common/Renderer.h"
|
|
#include "Common/CPUDetect.h"
|
|
#include "Common/Textures/TexMan.h"
|
|
#include "Common/Shaders/Parser.h"
|
|
#include "Common/SimpleFrameProfiler.h"
|
|
|
|
// per-frame profilers: collect the infromation for each frame for
|
|
// displaying statistics at the beginning of each frame
|
|
#define PROFILER(ID,NAME) DECLARE_FRAME_PROFILER(ID,NAME)
|
|
#include "Common/FrameProfilers-list.h"
|
|
#undef PROFILER
|
|
|
|
// All handled render elements (except common ones included in "RendElement.h")
|
|
#include "Common/RendElements/CREBeam.h"
|
|
#include "Common/RendElements/CREPrefabGeom.h"
|
|
#include "Common/RendElements/CREClientPoly.h"
|
|
#include "Common/RendElements/CREClientPoly2D.h"
|
|
#include "Common/RendElements/CREParticleSpray.h"
|
|
#include "Common/RendElements/CREFlares.h"
|
|
#include "Common/RendElements/CREPolyBlend.h"
|
|
#include "Common/RendElements/CRESkyZone.h"
|
|
#include "Common/RendElements/CREOcean.h"
|
|
#include "Common/RendElements/CREGlare.h"
|
|
#include "Common/RendElements/CRETempMesh.h"
|
|
|
|
|
|
#define max(a,b) (((a) > (b)) ? (a) : (b))
|
|
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
Vector transformations.
|
|
-----------------------------------------------------------------------------*/
|
|
|
|
//
|
|
// Transformations in optimized assembler format.
|
|
// An adaption of Michael Abrash' optimal transformation code.
|
|
//
|
|
#if DO_ASM
|
|
_inline void ASMTransformPoint(const SCoord &Coords, const Vec3d& InVector, Vec3d& OutVector)
|
|
{
|
|
// SCoords is a structure of 4 vectors: Origin, X, Y, Z
|
|
// x y z
|
|
// Vector Origin; 0 4 8
|
|
// Vector XAxis; 12 16 20
|
|
// Vector YAxis; 24 28 32
|
|
// Vector ZAxis; 36 40 44
|
|
//
|
|
// task: VectorSubtract(InVector, Coords.org, Temp);
|
|
// Outvector[0] = DotProduct(Temp, Coords.rot[0]);
|
|
// Outvector[1] = DotProduct(Temp, Coords.rot[1]);
|
|
// Outvector[2] = DotProduct(Temp, Coords.rot[2]);
|
|
|
|
//
|
|
// About 33 cycles on a Pentium.
|
|
//
|
|
__asm
|
|
{
|
|
mov esi,[InVector]
|
|
mov edx,[Coords]
|
|
mov edi,[OutVector]
|
|
|
|
// get source
|
|
fld dword ptr [esi+0]
|
|
fld dword ptr [esi+4]
|
|
fld dword ptr [esi+8] // z y x
|
|
fxch st(2) // xyz
|
|
|
|
// subtract origin
|
|
fsub dword ptr [edx + 0] // xyz
|
|
fxch st(1)
|
|
fsub dword ptr [edx + 4] // yxz
|
|
fxch st(2)
|
|
fsub dword ptr [edx + 8] // zxy
|
|
fxch st(1) // X Z Y
|
|
|
|
// triplicate X for transforming
|
|
fld st(0) // X X Z Y
|
|
fmul dword ptr [edx+12] // Xx X Z Y
|
|
fld st(1) // X Xx X Z Y
|
|
fmul dword ptr [edx+24] // Xy Xx X Z Y
|
|
fxch st(2)
|
|
fmul dword ptr [edx+36] // Xz Xx Xy Z Y
|
|
fxch st(4) // Y Xx Xy Z Xz
|
|
|
|
fld st(0) // Y Y Xx Xy Z Xz
|
|
fmul dword ptr [edx+16]
|
|
fld st(1) // Y Yx Y Xx Xy Z Xz
|
|
fmul dword ptr [edx+28]
|
|
fxch st(2) // Y Yx Yy Xx Xy Z Xz
|
|
fmul dword ptr [edx+40] // Yz Yx Yy Xx Xy Z Xz
|
|
fxch st(1) // Yx Yz Yy Xx Xy Z Xz
|
|
|
|
faddp st(3),st(0) // Yz Yy XxYx Xy Z Xz
|
|
faddp st(5),st(0) // Yy XxYx Xy Z XzYz
|
|
faddp st(2),st(0) // XxYx XyYy Z XzYz
|
|
fxch st(2) // Z XyYy XxYx XzYz
|
|
|
|
fld st(0) // Z Z XyYy XxYx XzYz
|
|
fmul dword ptr [edx+20]
|
|
fld st(1) // Z Zx Z XyYy XxYx XzYz
|
|
fmul dword ptr [edx+32]
|
|
fxch st(2) // Z Zx Zy
|
|
fmul dword ptr [edx+44] // Zz Zx Zy XyYy XxYx XzYz
|
|
fxch st(1) // Zx Zz Zy XyYy XxYx XzYz
|
|
|
|
faddp st(4),st(0) // Zz Zy XyYy XxYxZx XzYz
|
|
faddp st(4),st(0) // Zy XyYy XxYxZx XzYzZz
|
|
faddp st(1),st(0) // XyYyZy XxYxZx XzYzZz
|
|
|
|
fstp dword ptr [edi+4]
|
|
fstp dword ptr [edi+0]
|
|
fstp dword ptr [edi+8]
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if DO_ASM
|
|
_inline void ASMTransformVector(const SCoord &Coords, const Vec3d& InVector, Vec3d& OutVector)
|
|
{
|
|
__asm
|
|
{
|
|
mov esi,[InVector]
|
|
mov edx,[Coords]
|
|
mov edi,[OutVector]
|
|
|
|
// get source
|
|
fld dword ptr [esi+0]
|
|
fld dword ptr [esi+4]
|
|
fxch st(1)
|
|
fld dword ptr [esi+8] // z x y
|
|
fxch st(1) // x z y
|
|
|
|
// triplicate X for transforming
|
|
fld st(0) // X X Z Y
|
|
fmul dword ptr [edx+12] // Xx X Z Y
|
|
fld st(1) // X Xx X Z Y
|
|
fmul dword ptr [edx+24] // Xy Xx X Z Y
|
|
fxch st(2)
|
|
fmul dword ptr [edx+36] // Xz Xx Xy Z Y
|
|
fxch st(4) // Y Xx Xy Z Xz
|
|
|
|
fld st(0) // Y Y Xx Xy Z Xz
|
|
fmul dword ptr [edx+16]
|
|
fld st(1) // Y Yx Y Xx Xy Z Xz
|
|
fmul dword ptr [edx+28]
|
|
fxch st(2) // Y Yx Yy Xx Xy Z Xz
|
|
fmul dword ptr [edx+40] // Yz Yx Yy Xx Xy Z Xz
|
|
fxch st(1) // Yx Yz Yy Xx Xy Z Xz
|
|
|
|
faddp st(3),st(0) // Yz Yy XxYx Xy Z Xz
|
|
faddp st(5),st(0) // Yy XxYx Xy Z XzYz
|
|
faddp st(2),st(0) // XxYx XyYy Z XzYz
|
|
fxch st(2) // Z XyYy XxYx XzYz
|
|
|
|
fld st(0) // Z Z XyYy XxYx XzYz
|
|
fmul dword ptr [edx+20]
|
|
fld st(1) // Z Zx Z XyYy XxYx XzYz
|
|
fmul dword ptr [edx+32]
|
|
fxch st(2) // Z Zx Zy
|
|
fmul dword ptr [edx+44] // Zz Zx Zy XyYy XxYx XzYz
|
|
fxch st(1) // Zx Zz Zy XyYy XxYx XzYz
|
|
|
|
faddp st(4),st(0) // Zz Zy XyYy XxYxZx XzYz
|
|
faddp st(4),st(0) // Zy XyYy XxYxZx XzYzZz
|
|
faddp st(1),st(0) // XyYyZy XxYxZx XzYzZz
|
|
|
|
fstp dword ptr [edi+4]
|
|
fstp dword ptr [edi+0]
|
|
fstp dword ptr [edi+8]
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
//
|
|
// Transform a point by a coordinate system, moving
|
|
// it by the coordinate system's origin if nonzero.
|
|
//
|
|
_inline void TransformPoint( const SCoord &Coords, Vec3d& in, Vec3d& out)
|
|
{
|
|
#if !DO_ASM
|
|
Vec3d Temp;
|
|
|
|
Temp = in - Coords.m_Org;
|
|
out[0] = Temp | Coords.m_Vecs[0];
|
|
out[1] = Temp | Coords.m_Vecs[1];
|
|
out[2] = Temp | Coords.m_Vecs[2];
|
|
#else
|
|
ASMTransformPoint( Coords, in, out);
|
|
#endif
|
|
}
|
|
|
|
//we need a better function-name for this exotic operation
|
|
//there already is a "TransformPoint" in Cry_Matrix.h
|
|
_inline void TransformPoint( const Matrix44 &Matr, Vec3d& inp, Vec3d& outp)
|
|
{
|
|
//T_CHANGED_BY_IVO
|
|
//Vec3d Temp = inp - *(Vec3d *)&Matr.m_values[3][0];
|
|
Vec3d Temp = inp - Matr.GetTranslation();
|
|
|
|
//T_CHANGED_BY_IVO
|
|
//outp.x = Temp | *(Vec3d *)&Matr.m_values[0][0];
|
|
//outp.y = Temp | *(Vec3d *)&Matr.m_values[1][0];
|
|
//outp.z = Temp | *(Vec3d *)&Matr.m_values[2][0];
|
|
outp.x = Temp | Matr.GetOrtX();
|
|
outp.y = Temp | Matr.GetOrtY();
|
|
outp.z = Temp | Matr.GetOrtZ();
|
|
|
|
}
|
|
|
|
|
|
//
|
|
// Transform a directional vector by a coordinate system.
|
|
// Ignore's the coordinate system's origin.
|
|
//
|
|
_inline void TransformVector( SCoord Coords, Vec3d& in, Vec3d& out )
|
|
{
|
|
#if !DO_ASM
|
|
Vec3d Temp;
|
|
|
|
Temp = in;
|
|
out[0] = Temp | Coords.m_Vecs[0];
|
|
out[1] = Temp | Coords.m_Vecs[1];
|
|
out[2] = Temp | Coords.m_Vecs[2];
|
|
#else
|
|
ASMTransformVector( Coords, in, out);
|
|
#endif
|
|
}
|
|
|
|
_inline void TransformVec_ViewProj(Vec3d& v, Matrix44 viewmatr, Matrix44 projmatr, vec4_t vv, vec4_t pv)
|
|
{
|
|
int i;
|
|
|
|
for (i=0; i<4; i++)
|
|
{
|
|
vv[i] = viewmatr[0][i]*v[0] + viewmatr[1][i]*v[1] + viewmatr[2][i]*v[2] + viewmatr[3][i];
|
|
}
|
|
|
|
for (i=0; i<4; i++)
|
|
{
|
|
pv[i] = projmatr[0][i]*vv[0] + projmatr[1][i]*vv[1] + projmatr[2][i]*vv[2] + projmatr[3][i]*vv[3];
|
|
}
|
|
}
|
|
|
|
_inline void ProjectPoint(vec4_t pv, Vec3d& v3d, vec2_t v2d)
|
|
{
|
|
v3d[0] = pv[0] / pv[3];
|
|
v3d[1] = pv[1] / pv[3];
|
|
v3d[2] = (pv[2] + pv[3]) / (pv[2] + pv[3] + pv[3]);
|
|
|
|
v2d[0] = (float)QRound((v3d[0] + 1) * gRenDev->GetWidth() * 0.5f);
|
|
v2d[1] = (float)QRound((v3d[1] + 1) * gRenDev->GetHeight() * 0.5f);
|
|
|
|
v2d[0] = (float)QRound(v2d[0]);
|
|
v2d[1] = (float)QRound(v2d[1]);
|
|
}
|
|
|
|
_inline void TransformVector(Vec3d& out, Vec3d& in, Matrix44& m)
|
|
{
|
|
//T_CHANGED_BY_IVO
|
|
//out.x = in.x * m.m_values[0][0] + in.y * m.m_values[1][0] + in.z * m.m_values[2][0];
|
|
//out.y = in.x * m.m_values[0][1] + in.y * m.m_values[1][1] + in.z * m.m_values[2][1];
|
|
//out.z = in.x * m.m_values[0][2] + in.y * m.m_values[1][2] + in.z * m.m_values[2][2];
|
|
|
|
out.x = in.x * m(0,0) + in.y * m(1,0) + in.z * m(2,0);
|
|
out.y = in.x * m(0,1) + in.y * m(1,1) + in.z * m(2,1);
|
|
out.z = in.x * m(0,2) + in.y * m(1,2) + in.z * m(2,2);
|
|
}
|
|
|
|
_inline void TransformPosition(Vec3d& out, Vec3d& in, Matrix44& m)
|
|
{
|
|
|
|
//T_CHANGED_BY_IVO
|
|
//out.x = in.x * m.m_values[0][0] + in.y * m.m_values[1][0] + in.z * m.m_values[2][0] + m.m_values[3][0];
|
|
//out.y = in.x * m.m_values[0][1] + in.y * m.m_values[1][1] + in.z * m.m_values[2][1] + m.m_values[3][1];
|
|
//out.z = in.x * m.m_values[0][2] + in.y * m.m_values[1][2] + in.z * m.m_values[2][2] + m.m_values[3][2];
|
|
TransformVector (out, in, m);
|
|
out += m.GetTranslation();
|
|
}
|
|
|
|
|
|
inline Plane TransformPlaneByUsingAdjointT( const Matrix44& M, const Matrix44& TA, const Plane plSrc)
|
|
{
|
|
//CHANGED_BY_IVO
|
|
//Vec3d newNorm = TA.TransformVector(plSrc.n);
|
|
Vec3d newNorm = GetTransposed44(TA)*(plSrc.n);
|
|
|
|
newNorm.Normalize();
|
|
|
|
if(M.Determinant() < 0.f) newNorm *= -1;
|
|
|
|
Plane plane;
|
|
plane.Set(newNorm, M.TransformPointOLD(plSrc.n * plSrc.d) | newNorm);
|
|
|
|
return plane;
|
|
}
|
|
|
|
inline Matrix44 TransposeAdjoint(const Matrix44& M)
|
|
{
|
|
Matrix44 ta;
|
|
|
|
ta(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1);
|
|
ta(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2);
|
|
ta(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0);
|
|
ta(0,3) = 0.f;
|
|
|
|
ta(1,0) = M(2,1) * M(0,2) - M(2,2) * M(0,1);
|
|
ta(1,1) = M(2,2) * M(0,0) - M(2,0) * M(0,2);
|
|
ta(1,2) = M(2,0) * M(0,1) - M(2,1) * M(0,0);
|
|
ta(1,3) = 0.f;
|
|
|
|
ta(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1);
|
|
ta(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2);
|
|
ta(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0);
|
|
ta(2,3) = 0.f;
|
|
|
|
ta(3,0) = 0.f;
|
|
ta(3,1) = 0.f;
|
|
ta(3,2) = 0.f;
|
|
ta(3,1) = 1.f;
|
|
|
|
|
|
|
|
|
|
return ta;
|
|
}
|
|
|
|
inline Plane TransformPlane( const Matrix44& M, const Plane& plSrc)
|
|
{
|
|
Matrix44 tmpTA = TransposeAdjoint(M);
|
|
return TransformPlaneByUsingAdjointT(M, tmpTA, plSrc);
|
|
}
|
|
|
|
// Homogeneous plane transform.
|
|
inline Plane TransformPlane2(const Matrix44& m, const Plane& src )
|
|
{
|
|
Plane plDst;
|
|
|
|
float v0=src.n.x, v1=src.n.y, v2=src.n.z, v3=src.d;
|
|
plDst.n.x = v0 * m[0][0] + v1 * m[0][1] + v2 * m[0][2] + v3 * m[0][3];
|
|
plDst.n.y = v0 * m[1][0] + v1 * m[1][1] + v2 * m[1][2] + v3 * m[1][3];
|
|
plDst.n.z = v0 * m[2][0] + v1 * m[2][1] + v2 * m[2][2] + v3 * m[2][3];
|
|
|
|
plDst.d = v0 * m[3][0] + v1 * m[3][1] + v2 * m[3][2] + v3 * m[3][3];
|
|
|
|
return plDst;
|
|
}
|
|
inline Plane TransformPlane2_NoTrans(const Matrix44& m, const Plane& src )
|
|
{
|
|
Plane plDst;
|
|
|
|
float v0=src.n.x, v1=src.n.y, v2=src.n.z;
|
|
plDst.n.x = v0 * m[0][0] + v1 * m[0][1] + v2 * m[0][2];
|
|
plDst.n.y = v0 * m[1][0] + v1 * m[1][1] + v2 * m[1][2];
|
|
plDst.n.z = v0 * m[2][0] + v1 * m[2][1] + v2 * m[2][2];
|
|
|
|
plDst.d = src.d;
|
|
|
|
return plDst;
|
|
}
|
|
|
|
inline Plane TransformPlane2Transposed(const Matrix44& m, const Plane& src )
|
|
{
|
|
Plane plDst;
|
|
|
|
float v0=src.n.x, v1=src.n.y, v2=src.n.z, v3=src.d;
|
|
plDst.n.x = v0 * m[0][0] + v1 * m[1][0] + v2 * m[2][0] + v3 * m[3][0];
|
|
plDst.n.y = v0 * m[0][1] + v1 * m[1][1] + v2 * m[2][1] + v3 * m[3][1];
|
|
plDst.n.z = v0 * m[0][2] + v1 * m[2][1] + v2 * m[2][2] + v3 * m[3][2];
|
|
|
|
plDst.d = v0 * m[0][3] + v1 * m[1][3] + v2 * m[2][3] + v3 * m[3][3];
|
|
|
|
return plDst;
|
|
}
|
|
|
|
//===============================================================================================
|
|
|
|
_inline int CullBoxByPlane (float *Mins, float *Maxs, SPlane *p)
|
|
{
|
|
float dist1, dist2;
|
|
int sides;
|
|
|
|
// fast axial cases
|
|
if (p->m_Type < 3)
|
|
{
|
|
return (p->m_Dist <= Mins[p->m_Type]) ? 1 : (p->m_Dist >= Maxs[p->m_Type]) ? 2 : 3;
|
|
}
|
|
|
|
// general case
|
|
switch (p->m_SignBits)
|
|
{
|
|
case 0:
|
|
dist1 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Maxs[2];
|
|
dist2 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Mins[2];
|
|
break;
|
|
|
|
case 1:
|
|
dist1 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Maxs[2];
|
|
dist2 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Mins[2];
|
|
break;
|
|
|
|
case 2:
|
|
dist1 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Maxs[2];
|
|
dist2 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Mins[2];
|
|
break;
|
|
|
|
case 3:
|
|
dist1 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Maxs[2];
|
|
dist2 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Mins[2];
|
|
break;
|
|
|
|
case 4:
|
|
dist1 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Mins[2];
|
|
dist2 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Maxs[2];
|
|
break;
|
|
|
|
case 5:
|
|
dist1 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Mins[2];
|
|
dist2 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Maxs[2];
|
|
break;
|
|
|
|
case 6:
|
|
dist1 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Mins[2];
|
|
dist2 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Maxs[2];
|
|
break;
|
|
|
|
case 7:
|
|
dist1 = p->m_Normal[0]*Mins[0] + p->m_Normal[1]*Mins[1] + p->m_Normal[2]*Mins[2];
|
|
dist2 = p->m_Normal[0]*Maxs[0] + p->m_Normal[1]*Maxs[1] + p->m_Normal[2]*Maxs[2];
|
|
break;
|
|
|
|
default:
|
|
dist1 = dist2 = 0; // shut up compiler
|
|
ASSERT( 1 );
|
|
break;
|
|
}
|
|
|
|
sides = 0;
|
|
if (dist1 >= p->m_Dist)
|
|
sides = 1;
|
|
if (dist2 < p->m_Dist)
|
|
sides |= 2;
|
|
|
|
//ASSERT( sides != 0 );
|
|
|
|
return sides;
|
|
}
|
|
|
|
//===============================================================================================
|
|
|
|
// Interfaces from the Game
|
|
extern ILog *iLog;
|
|
extern IConsole *iConsole;
|
|
extern ITimer *iTimer;
|
|
extern ISystem *iSystem;
|
|
extern int *pTest_int;
|
|
extern IPhysicalWorld *pIPhysicalWorld;
|
|
|
|
#define MAX_PATH_LENGTH 512
|
|
|
|
inline void _text_to_log(char * format, ...)
|
|
{
|
|
char buffer[MAX_PATH_LENGTH];
|
|
va_list args;
|
|
va_start(args, format);
|
|
vsprintf(buffer, format, args);
|
|
va_end(args);
|
|
|
|
iLog->Log(buffer);
|
|
if (gRenDev->CV_r_log == 3)
|
|
gRenDev->Logv(SRendItem::m_RecurseLevel, buffer);
|
|
}
|
|
|
|
inline void _text_to_logPlus(char * format, ...)
|
|
{
|
|
char buffer[MAX_PATH_LENGTH];
|
|
va_list args;
|
|
va_start(args, format);
|
|
vsprintf(buffer, format, args);
|
|
va_end(args);
|
|
|
|
iLog->LogPlus(buffer);
|
|
if (gRenDev->CV_r_log == 3)
|
|
gRenDev->Logv(SRendItem::m_RecurseLevel, buffer);
|
|
}
|
|
|
|
inline void _UpdateLoadingScreen(const char * format, ...)
|
|
{
|
|
if(format)
|
|
{
|
|
char buffer[MAX_PATH_LENGTH];
|
|
va_list args;
|
|
va_start(args, format);
|
|
vsprintf(buffer, format, args);
|
|
va_end(args);
|
|
|
|
iLog->Log(buffer);
|
|
if (gRenDev->CV_r_log == 3)
|
|
gRenDev->Logv(SRendItem::m_RecurseLevel, buffer);
|
|
}
|
|
|
|
//iConsole->Update();
|
|
//gRenDev->BeginFrame();
|
|
//iConsole->Draw();
|
|
//gRenDev->Update();
|
|
}
|
|
|
|
inline void _UpdateLoadingScreenPlus(const char * format, ...)
|
|
{
|
|
if(format)
|
|
{
|
|
char buffer[MAX_PATH_LENGTH];
|
|
va_list args;
|
|
va_start(args, format);
|
|
vsprintf(buffer, format, args);
|
|
va_end(args);
|
|
|
|
iLog->Log(buffer);
|
|
if (gRenDev->CV_r_log == 3)
|
|
gRenDev->Logv(SRendItem::m_RecurseLevel, buffer);
|
|
}
|
|
|
|
iConsole->Update();
|
|
gRenDev->BeginFrame();
|
|
iConsole->Draw();
|
|
gRenDev->Update();
|
|
}
|
|
|
|
_inline char * Cry_strdup(const char * str)
|
|
{
|
|
char *memory;
|
|
|
|
if (!str)
|
|
return(NULL);
|
|
|
|
memory = (char *)malloc(strlen(str) + 1);
|
|
if (memory)
|
|
return(strcpy(memory,str));
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
const char* GetExtension (const char *in);
|
|
void StripExtension (const char *in, char *out);
|
|
void AddExtension (char *path, char *extension);
|
|
void ConvertDOSToUnixName( char *dst, const char *src );
|
|
void ConvertUnixToDosName( char *dst, const char *src );
|
|
void UsePath (char *name, char *path, char *dst);
|
|
|
|
#define Vector2Copy(a,b) {b[0]=a[0];b[1]=a[1];}
|
|
|
|
//==================================================================
|
|
// Profiling
|
|
|
|
inline DWORD sCycles()
|
|
{
|
|
uint L;
|
|
#ifndef PS2
|
|
__asm
|
|
{
|
|
xor eax,eax // Required so that VC++ realizes EAX is modified.
|
|
_emit 0x0F // RDTSC - Pentium+ time stamp register to EDX:EAX.
|
|
_emit 0x31 // Use only 32 bits in EAX - even a Ghz cpu would have a 4+ sec period.
|
|
mov [L],eax // Save low value.
|
|
xor edx,edx // Required so that VC++ realizes EDX is modified.
|
|
}
|
|
#else
|
|
L = 0;
|
|
#endif
|
|
return L;
|
|
}
|
|
|
|
inline double sCycles2()
|
|
{
|
|
uint L,H;
|
|
#ifndef PS2
|
|
__asm
|
|
{
|
|
xor eax,eax // Required so that VC++ realizes EAX is modified.
|
|
xor edx,edx // Required so that VC++ realizes EDX is modified.
|
|
_emit 0x0F // RDTSC - Pentium+ time stamp register to EDX:EAX.
|
|
_emit 0x31 // Use only 32 bits in EAX - even a Ghz cpu would have a 4+ sec period.
|
|
mov [L],eax // Save low value.
|
|
mov [H],edx // Save high value.
|
|
}
|
|
#else
|
|
L = H = 0;
|
|
#endif
|
|
return ((DOUBLE)L + 4294967296.0 * (DOUBLE)H);
|
|
}
|
|
|
|
#define FP_BITS(fp) (*(DWORD *)&(fp))
|
|
|
|
_inline float C_sqrt_tab(float n)
|
|
{
|
|
|
|
if (FP_BITS(n) == 0)
|
|
return 0.0; // check for square root of 0
|
|
|
|
FP_BITS(n) = gRenDev->fast_sqrt_table[(FP_BITS(n) >> 8) & 0xFFFF] | ((((FP_BITS(n) - 0x3F800000) >> 1) + 0x3F800000) & 0x7F800000);
|
|
|
|
return n;
|
|
}
|
|
|
|
//=========================================================================================
|
|
|
|
//
|
|
// Memory copy.
|
|
//
|
|
#if DO_ASM
|
|
#define DEFINED_cryMemcpy
|
|
/******************************************************************************
|
|
|
|
Copyright (c) 2001 Advanced Micro Devices, Inc.
|
|
|
|
LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
|
|
EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
|
|
NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
|
|
PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
|
|
DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
|
|
BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
|
|
INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
|
|
OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
|
|
OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
|
|
NOT APPLY TO YOU.
|
|
|
|
AMD does not assume any responsibility for any errors which may appear in the
|
|
Materials nor any responsibility to support or update the Materials. AMD retains
|
|
the right to make changes to its test specifications at any time, without notice.
|
|
|
|
NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
|
|
further information, software, technical information, know-how, or show-how
|
|
available to you.
|
|
|
|
So that all may benefit from your experience, please report any problems
|
|
or suggestions about this software to 3dsdk.support@amd.com
|
|
|
|
AMD Developer Technologies, M/S 585
|
|
Advanced Micro Devices, Inc.
|
|
5900 E. Ben White Blvd.
|
|
Austin, TX 78741
|
|
3dsdk.support@amd.com
|
|
******************************************************************************/
|
|
|
|
/*****************************************************************************
|
|
MEMCPY_AMD.CPP
|
|
******************************************************************************/
|
|
|
|
// Very optimized memcpy() routine for AMD Athlon and Duron family.
|
|
// This code uses any of FOUR different basic copy methods, depending
|
|
// on the transfer size.
|
|
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
|
|
// "Streaming Store"), and also uses the software prefetch instructions,
|
|
// be sure you're running on Athlon/Duron or other recent CPU before calling!
|
|
|
|
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
|
|
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
|
// form which is an "unrolled loop".
|
|
|
|
#define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch
|
|
// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
|
|
// also using the "unrolled loop" optimization. This code uses
|
|
// the software prefetch instruction to get the data into the cache.
|
|
|
|
#define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch
|
|
// For larger blocks, which will spill beyond the cache, it's faster to
|
|
// use the Streaming Store instruction MOVNTQ. This write instruction
|
|
// bypasses the cache and writes straight to main memory. This code also
|
|
// uses the software prefetch instruction to pre-read the data.
|
|
// USE 64 * 1024 FOR THIS VALUE IF YOU'RE ALWAYS FILLING A "CLEAN CACHE"
|
|
|
|
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
|
|
#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
|
|
// For the largest size blocks, a special technique called Block Prefetch
|
|
// can be used to accelerate the read operations. Block Prefetch reads
|
|
// one address per cache line, for a series of cache lines, in a short loop.
|
|
// This is faster than using software prefetch. The technique is great for
|
|
// getting maximum read bandwidth, especially in DDR memory systems.
|
|
|
|
// Inline assembly syntax for use with Visual C++
|
|
inline void cryMemcpy( void* Dst, const void* Src, INT Count )
|
|
{
|
|
if( gRenDev->m_Cpu->mCpu[0].mFeatures & CFI_MMX )
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx, [Count] ; number of bytes to copy
|
|
mov edi, [Dst] ; destination
|
|
mov esi, [Src] ; source
|
|
mov ebx, ecx ; keep a copy of count
|
|
|
|
cld
|
|
cmp ecx, TINY_BLOCK_COPY
|
|
jb $memcpy_ic_3 ; tiny? skip mmx copy
|
|
|
|
cmp ecx, 32*1024 ; don't align between 32k-64k because
|
|
jbe $memcpy_do_align ; it appears to be slower
|
|
cmp ecx, 64*1024
|
|
jbe $memcpy_align_done
|
|
$memcpy_do_align:
|
|
mov ecx, 8 ; a trick that's faster than rep movsb...
|
|
sub ecx, edi ; align destination to qword
|
|
and ecx, 111b ; get the low bits
|
|
sub ebx, ecx ; update copy count
|
|
neg ecx ; set up to jump into the array
|
|
add ecx, offset $memcpy_align_done
|
|
jmp ecx ; jump to array of movsb's
|
|
|
|
align 4
|
|
movsb
|
|
movsb
|
|
movsb
|
|
movsb
|
|
movsb
|
|
movsb
|
|
movsb
|
|
movsb
|
|
|
|
$memcpy_align_done: ; destination is dword aligned
|
|
mov ecx, ebx ; number of bytes left to copy
|
|
shr ecx, 6 ; get 64-byte block count
|
|
jz $memcpy_ic_2 ; finish the last few bytes
|
|
|
|
cmp ecx, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
|
|
jae $memcpy_uc_test
|
|
|
|
// This is small block copy that uses the MMX registers to copy 8 bytes
|
|
// at a time. It uses the "unrolled loop" optimization, and also uses
|
|
// the software prefetch instruction to get the data into the cache.
|
|
align 16
|
|
$memcpy_ic_1: ; 64-byte block copies, in-cache copy
|
|
|
|
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
|
|
|
|
movq mm0, [esi+0] ; read 64 bits
|
|
movq mm1, [esi+8]
|
|
movq [edi+0], mm0 ; write 64 bits
|
|
movq [edi+8], mm1 ; note: the normal movq writes the
|
|
movq mm2, [esi+16] ; data to cache; a cache line will be
|
|
movq mm3, [esi+24] ; allocated as needed, to store the data
|
|
movq [edi+16], mm2
|
|
movq [edi+24], mm3
|
|
movq mm0, [esi+32]
|
|
movq mm1, [esi+40]
|
|
movq [edi+32], mm0
|
|
movq [edi+40], mm1
|
|
movq mm2, [esi+48]
|
|
movq mm3, [esi+56]
|
|
movq [edi+48], mm2
|
|
movq [edi+56], mm3
|
|
|
|
add esi, 64 ; update source pointer
|
|
add edi, 64 ; update destination pointer
|
|
dec ecx ; count down
|
|
jnz $memcpy_ic_1 ; last 64-byte block?
|
|
|
|
$memcpy_ic_2:
|
|
mov ecx, ebx ; has valid low 6 bits of the byte count
|
|
$memcpy_ic_3:
|
|
shr ecx, 2 ; dword count
|
|
and ecx, 1111b ; only look at the "remainder" bits
|
|
neg ecx ; set up to jump into the array
|
|
add ecx, offset $memcpy_last_few
|
|
jmp ecx ; jump to array of movsd's
|
|
|
|
$memcpy_uc_test:
|
|
cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
|
|
jae $memcpy_bp_1
|
|
|
|
$memcpy_64_test:
|
|
or ecx, ecx ; tail end of block prefetch will jump here
|
|
jz $memcpy_ic_2 ; no more 64-byte blocks left
|
|
|
|
// For larger blocks, which will spill beyond the cache, it's faster to
|
|
// use the Streaming Store instruction MOVNTQ. This write instruction
|
|
// bypasses the cache and writes straight to main memory. This code also
|
|
// uses the software prefetch instruction to pre-read the data.
|
|
align 16
|
|
$memcpy_uc_1: ; 64-byte blocks, uncached copy
|
|
|
|
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
|
|
|
|
movq mm0,[esi+0] ; read 64 bits
|
|
add edi,64 ; update destination pointer
|
|
movq mm1,[esi+8]
|
|
add esi,64 ; update source pointer
|
|
movq mm2,[esi-48]
|
|
movntq [edi-64], mm0 ; write 64 bits, bypassing the cache
|
|
movq mm0,[esi-40] ; note: movntq also prevents the CPU
|
|
movntq [edi-56], mm1 ; from READING the destination address
|
|
movq mm1,[esi-32] ; into the cache, only to be over-written
|
|
movntq [edi-48], mm2 ; so that also helps performance
|
|
movq mm2,[esi-24]
|
|
movntq [edi-40], mm0
|
|
movq mm0,[esi-16]
|
|
movntq [edi-32], mm1
|
|
movq mm1,[esi-8]
|
|
movntq [edi-24], mm2
|
|
movntq [edi-16], mm0
|
|
dec ecx
|
|
movntq [edi-8], mm1
|
|
jnz $memcpy_uc_1 ; last 64-byte block?
|
|
|
|
jmp $memcpy_ic_2 ; almost done
|
|
|
|
// For the largest size blocks, a special technique called Block Prefetch
|
|
// can be used to accelerate the read operations. Block Prefetch reads
|
|
// one address per cache line, for a series of cache lines, in a short loop.
|
|
// This is faster than using software prefetch. The technique is great for
|
|
// getting maximum read bandwidth, especially in DDR memory systems.
|
|
$memcpy_bp_1: ; large blocks, block prefetch copy
|
|
|
|
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
|
|
jl $memcpy_64_test ; no, back to regular uncached copy
|
|
|
|
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
|
|
add esi, CACHEBLOCK * 64 ; move to the top of the block
|
|
align 16
|
|
$memcpy_bp_2:
|
|
mov edx, [esi-64] ; grab one address per cache line
|
|
mov edx, [esi-128] ; grab one address per cache line
|
|
sub esi, 128 ; go reverse order to suppress HW prefetcher
|
|
dec eax ; count down the cache lines
|
|
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
|
|
|
|
mov eax, CACHEBLOCK ; now that it's in cache, do the copy
|
|
align 16
|
|
$memcpy_bp_3:
|
|
movq mm0, [esi ] ; read 64 bits
|
|
movq mm1, [esi+ 8]
|
|
movq mm2, [esi+16]
|
|
movq mm3, [esi+24]
|
|
movq mm4, [esi+32]
|
|
movq mm5, [esi+40]
|
|
movq mm6, [esi+48]
|
|
movq mm7, [esi+56]
|
|
add esi, 64 ; update source pointer
|
|
movntq [edi ], mm0 ; write 64 bits, bypassing cache
|
|
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
|
|
movntq [edi+16], mm2 ; from READING the destination address
|
|
movntq [edi+24], mm3 ; into the cache, only to be over-written,
|
|
movntq [edi+32], mm4 ; so that also helps performance
|
|
movntq [edi+40], mm5
|
|
movntq [edi+48], mm6
|
|
movntq [edi+56], mm7
|
|
add edi, 64 ; update dest pointer
|
|
|
|
dec eax ; count down
|
|
|
|
jnz $memcpy_bp_3 ; keep copying
|
|
sub ecx, CACHEBLOCK ; update the 64-byte block count
|
|
jmp $memcpy_bp_1 ; keep processing chunks
|
|
|
|
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
|
// form which is an "unrolled loop". Then it handles the last few bytes.
|
|
align 4
|
|
movsd
|
|
movsd ; perform last 1-15 dword copies
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd ; perform last 1-7 dword copies
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
movsd
|
|
|
|
$memcpy_last_few: ; dword aligned from before movsd's
|
|
mov ecx, ebx ; has valid low 2 bits of the byte count
|
|
and ecx, 11b ; the last few cows must come home
|
|
jz $memcpy_final ; no more, let's leave
|
|
rep movsb ; the last 1, 2, or 3 bytes
|
|
|
|
$memcpy_final:
|
|
emms ; clean up the MMX state
|
|
sfence ; flush the write buffer
|
|
// mov eax, [dest] ; ret value = destination pointer
|
|
}
|
|
}
|
|
else
|
|
{
|
|
__asm
|
|
{
|
|
mov ecx, Count
|
|
mov esi, Src
|
|
mov edi, Dst
|
|
mov ebx, ecx
|
|
shr ecx, 2
|
|
and ebx, 3
|
|
rep movsd
|
|
mov ecx, ebx
|
|
rep movsb
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
inline void cryMemcpy( void* Dst, const void* Src, INT Count )
|
|
{
|
|
memcpy(Dst, Src, Count);
|
|
}
|
|
#endif
|
|
|
|
//=========================================================================================
|
|
|
|
//
|
|
// Normal timing.
|
|
//
|
|
#define ticks(Timer) {Timer -= sCycles2();}
|
|
#define unticks(Timer) {Timer += sCycles2()+34;}
|
|
|
|
|
|
//=============================================================================
|
|
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
The End.
|
|
-----------------------------------------------------------------------------*/
|