212 lines
8.1 KiB
C++
212 lines
8.1 KiB
C++
#ifndef _CRY_ANIMATION_SSE_UTILS_HDR_
|
||
#define _CRY_ANIMATION_SSE_UTILS_HDR_
|
||
|
||
#include "MathUtils.h"
|
||
|
||
#ifdef _CPU_X86
|
||
// given the array of matrices, calculates the min/max
|
||
// of their positions, and puts them into the min and max Vec3d
|
||
// NOTE: the matrix array must be aligned on 16-byte boundary
|
||
extern void getBBoxSSE (const Matrix44* pBones, unsigned numBones, CryAABB* pBBox);
|
||
extern void getBBoxSSE (const Matrix44* pBones, const CryBBoxA16* pBoneBBox, unsigned numBones, CryAABB* pBBox);
|
||
#endif
|
||
|
||
// packs the array of Vec3dA16 into Vec3d's
|
||
extern void packVec3d16 (void* pData, unsigned nCount);
|
||
|
||
namespace cpu
|
||
{
|
||
// detects CPU features (SSE) and perhaps sets up some
|
||
// pointers to functions
|
||
void detect ();
|
||
|
||
// the CPU feature set, as returned by CPUID(1):EDX
|
||
extern DWORD g_dwFeatures;
|
||
|
||
// the extended feature set, as returned by CPUID(0x80000001):EDX
|
||
extern DWORD g_dwFeaturesEx;
|
||
|
||
enum FeaturesEnum
|
||
{
|
||
// Floating Point Unit On-Chip. The processor contains an x87 FPU.
|
||
g_featureFPU = 1,
|
||
|
||
// Virtual 8086 Mode Enhancements. Virtual 8086 mode enhancements, including
|
||
// CR4.VME for controlling the feature, CR4.PVI for protected mode virtual
|
||
// interrupts, software interrupt indirection, expansion of the TSS with the software
|
||
// indirection bitmap, and EFLAGS.VIF and EFLAGS.VIP flags.
|
||
g_featureVME = 1 << 1,
|
||
|
||
|
||
// Debugging Extensions. Support for I/O breakpoints, including CR4.DE for
|
||
// controlling the feature, and optional trapping of accesses to DR4 and DR5.
|
||
g_featureDE = 1 << 2,
|
||
g_featureDebuggingExtensions = g_featureDE,
|
||
|
||
// Page Size Extension. Large pages of size 4Mbyte are supported, including
|
||
// CR4.PSE for controlling the feature, the defined dirty bit in PDE (Page Directory
|
||
// Entries), optional reserved bit trapping in CR3, PDEs, and PTEs.
|
||
g_featurePSE = 1 << 3,
|
||
|
||
// Time Stamp Counter. The RDTSC instruction is supported, including CR4.TSD
|
||
// for controlling privilege.
|
||
g_featureTSC = 1 << 4,
|
||
|
||
// Model Specific Registers RDMSR and WRMSR Instructions. The RDMSR and
|
||
// WRMSR instructions are supported. Some of the MSRs are implementation
|
||
// dependent.
|
||
g_featureMSR = 1 << 5,
|
||
|
||
// PAE Physical Address Extension. Physical addresses greater than 32 bits are
|
||
// supported: extended page table entry formats, an extra level in the page
|
||
// translation tables is defined, 2 Mbyte pages are supported instead of 4 Mbyte
|
||
// pages if PAE bit is 1. The actual number of address bits beyond 32 is not defined,
|
||
// and is implementation specific.
|
||
g_featurePAE = 1 << 6,
|
||
|
||
// MCE Machine Check Exception. Exception 18 is defined for Machine Checks,
|
||
// including CR4.MCE for controlling the feature. This feature does not define the
|
||
// model-specific implementations of machine-check error logging, reporting, and
|
||
// processor shutdowns. Machine Check exception handlers may have to depend on
|
||
// processor version to do model specific processing of the exception, or test for the
|
||
// presence of the Machine Check feature.
|
||
g_featureMCE = 1 << 7,
|
||
|
||
// CMPXCHG8B Instruction. The compare-and-exchange 8 bytes (64 bits)
|
||
// instruction is supported (implicitly locked and atomic).
|
||
g_featureCX8 = 1 << 8,
|
||
|
||
// APIC On-Chip. The processor contains an Advanced Programmable Interrupt
|
||
// Controller (APIC), responding to memory mapped commands in the physical
|
||
// address range FFFE0000H to FFFE0FFFH (by default - some processors permit
|
||
// the APIC to be relocated).
|
||
g_featureAPIC = 1 << 9,
|
||
|
||
// SYSENTER and SYSEXIT Instructions. The SYSENTER and SYSEXIT and
|
||
// associated MSRs are supported.
|
||
g_featureSEP = 1 << 11,
|
||
|
||
// Memory Type Range Registers. MTRRs are supported. The MTRRcap MSR
|
||
// contains feature bits that describe what memory types are supported, how many
|
||
// variable MTRRs are supported, and whether fixed MTRRs are supported.
|
||
g_featureMTRR = 1 << 12,
|
||
|
||
// PTE Global Bit. The global bit in page directory entries (PDEs) and page table
|
||
// entries (PTEs) is supported, indicating TLB entries that are common to different
|
||
// processes and need not be flushed. The CR4.PGE bit controls this feature.
|
||
g_featurePGE = 1 << 13,
|
||
|
||
// Machine Check Architecture. The Machine Check Architecture, which provides
|
||
// a compatible mechanism for error reporting in P6 family, Pentium 4, and Intel
|
||
// Xeon processors, and future processors, is supported. The MCG_CAP MSR
|
||
// contains feature bits describing how many banks of error reporting MSRs are
|
||
// supported.
|
||
g_featureMCA = 1 << 14,
|
||
|
||
// Conditional Move Instructions. The conditional move instruction CMOV is
|
||
// supported. In addition, if x87 FPU is present as indicated by the CPUID.FPU
|
||
// feature bit, then the FCOMI and FCMOV instructions are supported
|
||
g_featureCMOV = 1 << 15,
|
||
|
||
//Page Attribute Table. Page Attribute Table is supported. This feature augments
|
||
//the Memory Type Range Registers (MTRRs), allowing an operating system to
|
||
//specify attributes of memory on a 4K granularity through a linear address.
|
||
g_featurePAT = 1 << 16,
|
||
|
||
//32-Bit Page Size Extension. Extended 4-MByte pages that are capable of
|
||
//addressing physical memory beyond 4 GBytes are supported. This feature
|
||
//indicates that the upper four bits of the physical address of the 4-MByte page is
|
||
//encoded by bits 13-16 of the page directory entry.
|
||
g_featurePSE36 = 1 << 17,
|
||
|
||
// PSN Processor Serial Number. The processor supports the 96-bit processor
|
||
// identification number feature and the feature is enabled.
|
||
g_featurePSN = 1 << 18,
|
||
|
||
// CLFLUSH Instruction. CLFLUSH Instruction is supported.
|
||
g_featureCLFSH = 1 << 19,
|
||
|
||
// Reserved
|
||
g_featureReserved = 1 << 20,
|
||
|
||
// DS Debug Store. The processor supports the ability to write debug information into a
|
||
// memory resident buffer. This feature is used by the branch trace store (BTS) and
|
||
// precise event-based sampling (PEBS) facilities (see Chapter 15, Debugging and
|
||
// Performance Monitoring, in the IA-32 Intel Architecture Software Developer<65>s
|
||
// Manual, Volume 3).
|
||
g_featureDS = 1 << 21,
|
||
|
||
// Thermal Monitor and Software Controlled Clock Facilities. The processor
|
||
// implements internal MSRs that allow processor temperature to be monitored and
|
||
// processor performance to be modulated in predefined duty cycles under software
|
||
// control.
|
||
g_featureACPI = 1 << 22,
|
||
|
||
// Intel MMX Technology. The processor supports the Intel MMX technology.
|
||
g_featureMMX = 1 << 23,
|
||
|
||
// FXSAVE and FXRSTOR Instructions. The FXSAVE and FXRSTOR instructions
|
||
// are supported for fast save and restore of the floating point context. Presence of
|
||
// this bit also indicates that CR4.OSFXSR is available for an operating system to
|
||
// indicate that it supports the FXSAVE and FXRSTOR instructions
|
||
g_featureFXSR = 1 << 24,
|
||
|
||
// SSE. The processor supports the SSE extensions.
|
||
g_featureSSE = 1 << 25,
|
||
|
||
// SSE2. The processor supports the SSE2 extensions.
|
||
g_featureSSE2 = 1 << 26,
|
||
|
||
// Self Snoop. The processor supports the management of conflicting memory
|
||
// types by performing a snoop of its own cache structure for transactions issued to
|
||
// the bus
|
||
g_featureSS = 1 << 27,
|
||
|
||
// Hyper-Threading Technology. The processor implements Hyper-Threading
|
||
// Technology.
|
||
g_featureHTT = 1 << 28,
|
||
|
||
// TM Thermal Monitor. The processor implements the thermal monitor automatic
|
||
// thermal control circuitry (TCC).
|
||
g_featureTM = 1 << 29,
|
||
|
||
// Reserved
|
||
g_featureReserved30 = 1 << 30,
|
||
|
||
// PBE Pending Break Enable. The processor supports the use of the FERR#/PBE# pin
|
||
// when the processor is in the stop-clock state (STPCLK# is asserted) to signal the
|
||
// processor that an interrupt is pending and that the processor should return to
|
||
// normal operation to handle the interrupt. Bit 10 (PBE enable) in the
|
||
// IA32_MISC_ENABLE MSR enables this capability.
|
||
g_featurePBE = 1 << 31,
|
||
|
||
// this is 3D Now! instruction set bit
|
||
g_featureEx3DNow = 0x80000000
|
||
};
|
||
|
||
inline int hasSSE()
|
||
{
|
||
#ifdef _AMD64_
|
||
return g_featureSSE;
|
||
#else
|
||
return (g_dwFeatures & g_featureSSE);
|
||
#endif
|
||
}
|
||
|
||
// does the machine have the RDTSC instruction?
|
||
inline int hasRDTSC()
|
||
{
|
||
return (g_dwFeatures & g_featureTSC);
|
||
}
|
||
|
||
// does the machine have 3D Now instruction set support?
|
||
inline int has3DNow()
|
||
{
|
||
return (g_dwFeaturesEx & g_featureEx3DNow);
|
||
}
|
||
|
||
extern void logCaps();
|
||
}
|
||
|
||
|
||
#endif |