00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef DOXYGEN
00013
00014 #include <xmmintrin.h>
00015 #include <emmintrin.h>
00016
00017 #pragma inline_recursion(on)
00018 #pragma inline_depth(255)
00019
00020
00021 #ifdef TA_SSE2
00022 #define TA_MM_SHUFFLE(x, y) ((__m128&)_mm_shuffle_epi32((__m128i&)(x), (y)))
00023 #else
00024 #define TA_MM_SHUFFLE(x, y) (_mm_shuffle_ps((x), (x), (y)))
00025 #endif
00026
00027
00028 namespace TA
00029 {
00030
00031 namespace Meta
00032 {
00033 struct _Vec3
00034 {
00035 union
00036 {
00037 struct
00038 {
00039 float x;
00040 float y;
00041 float z;
00042 float w;
00043 };
00044 __m128 xyzw;
00045 };
00046 };
00047
00048 template <class Type>
00049 struct ScalerVec
00050 {
00051 __forceinline _Vec3 EvaluateVec3() const { return ((Type&)*this).EvaluateVec3() ; }
00052 __forceinline float EvaluateFloat() const { return ((Type&)*this).EvaluateFloat(); }
00053 };
00054
00055 __forceinline __m128 VecDotFunc(__m128 a, __m128 b)
00056 {
00057 __m128 mm1 = _mm_mul_ps(a, b);
00058 __m128 mm2 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 0, 2, 1));
00059 mm2 = _mm_add_ps(mm2, mm1);
00060 mm1 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 1, 0, 2));
00061 mm1 = _mm_add_ps(mm1, mm2);
00062 return mm1;
00063 }
00064
00065 struct VecDot : public ScalerVec<VecDot>
00066 {
00067 const _Vec3& m_a;
00068 const _Vec3& m_b;
00069 VecDot(const _Vec3& a, const _Vec3& b) : m_a(a), m_b(b) {}
00070 __forceinline _Vec3 EvaluateVec3() const
00071 {
00072 return (_Vec3&)VecDotFunc((__m128&)m_a, (__m128&)m_b);
00073 }
00074 __forceinline float EvaluateFloat() const
00075 {
00076 #ifdef TA_SSE2
00077 __m128 mm1 = _mm_mul_ps((__m128&)m_a, (__m128&)m_b);
00078 __m128 mm2 = (__m128&)_mm_shuffle_epi32((__m128i&)mm1, _MM_SHUFFLE(0, 0, 0, 1));
00079 __m128 mm3 = (__m128&)_mm_shuffle_epi32((__m128i&)mm1, _MM_SHUFFLE(0, 0, 0, 2));
00080 mm2 = _mm_add_ss(mm2, mm1);
00081 mm2 = _mm_add_ss(mm2, mm3);
00082 return ((_Vec3&)mm2).x;
00083 #else
00084 _Vec3 dst;
00085 (__m128&)dst = _mm_mul_ps((__m128&)m_a, (__m128&)m_b);
00086 return dst.x + dst.y + dst.z;
00087 #endif
00088 }
00089 __forceinline operator float() const { return EvaluateFloat(); }
00090 };
00091
00092 struct VecMagnitudeSqrd : public ScalerVec<VecMagnitudeSqrd>
00093 {
00094 const _Vec3& m_a;
00095 VecMagnitudeSqrd(const _Vec3& a) : m_a(a) {}
00096 __forceinline _Vec3 EvaluateVec3() const
00097 {
00098 __m128 mm1 = (__m128&)m_a;
00099 mm1 = VecDotFunc((__m128&)mm1, (__m128&)mm1);
00100 return (_Vec3&)mm1;
00101 }
00102 __forceinline float EvaluateFloat() const
00103 {
00104 _Vec3 dst = m_a;
00105
00106
00107
00108 #ifdef TA_SSE2
00109 __m128 mm1 = _mm_mul_ps((__m128&)dst, (__m128&)dst);
00110 __m128 mm2 = (__m128&)_mm_shuffle_epi32((__m128i&)mm1, _MM_SHUFFLE(0, 0, 0, 1));
00111 __m128 mm3 = (__m128&)_mm_shuffle_epi32((__m128i&)mm1, _MM_SHUFFLE(0, 0, 0, 2));
00112 mm2 = _mm_add_ss(mm2, mm1);
00113 mm2 = _mm_add_ss(mm2, mm3);
00114 return ((_Vec3&)mm2).x;
00115 #else
00116 (__m128&)dst = _mm_mul_ps((__m128&)dst, (__m128&)dst);
00117 return dst.x + dst.y + dst.z;
00118 #endif
00119 }
00120 __forceinline operator float() const { return EvaluateFloat(); }
00121 };
00122
00123 struct VecMagnitude : public ScalerVec<VecMagnitude>
00124 {
00125 const _Vec3& m_a;
00126 VecMagnitude(const _Vec3& a) : m_a(a) {}
00127 __forceinline _Vec3 EvaluateVec3() const
00128 {
00129 __m128 mm1 = VecDotFunc((__m128&)m_a, (__m128&)m_a);
00130 return (_Vec3&)_mm_sqrt_ps(mm1);
00131 }
00132 __forceinline float EvaluateFloat() const
00133 {
00134 _Vec3 dst;
00135 (__m128&)dst = _mm_mul_ps((__m128&)m_a, (__m128&)m_a);
00136 return sqrtf(dst.x + dst.y + dst.z);
00137 }
00138 __forceinline operator float() const { return EvaluateFloat(); }
00139 };
00140 }
00141
00142
00143
00144 TA_ALIGN_16 struct TACOMMON_CLASS Vec3 : public Meta::_Vec3
00145 {
00146 enum Axis
00147 {
00148 AXIS_X = 0,
00149 AXIS_Y,
00150 AXIS_Z,
00151 };
00152
00153
00154
00155 __forceinline Vec3() { (__m128&)*this = _mm_setzero_ps(); }
00156 __forceinline Vec3(int nNothing) {}
00157 __forceinline Vec3(const Vec3& v3Value) { xyzw = v3Value.xyzw; }
00158 __forceinline Vec3(float fX, float fY, float fZ) { xyzw = _mm_setr_ps(fX, fY, fZ, 0.0f); }
00159 __forceinline void Initialise(float fX, float fY, float fZ) { xyzw = _mm_setr_ps(fX, fY, fZ, 0.0f); }
00160
00161
00162
00163 __forceinline operator float* () { return (float*)&x; }
00164 __forceinline operator const float* () const { return (float*)&x; }
00165
00166
00167 __forceinline float& operator [] (int nIndex) { return ((float*)&x)[nIndex]; }
00168 __forceinline const float& operator [] (int nIndex) const { return ((float*)&x)[nIndex]; }
00169
00170
00171 __forceinline Vec3& operator += (const Vec3& v3Value) { (__m128&)(*this) = _mm_add_ps((__m128&)(*this), (__m128&)v3Value); return *this; }
00172 __forceinline Vec3& operator -= (const Vec3& v3Value) { (__m128&)(*this) = _mm_sub_ps((__m128&)(*this), (__m128&)v3Value); return *this; }
00173
00174 template <class Type> __forceinline const Vec3& operator = (const Meta::ScalerVec<Type>& src) {*this = src.Evaluate(); return *this; }
00175 __forceinline const Vec3& operator = (const Vec3& src) { xyzw = src.xyzw; return *this; }
00176
00178
00179 __forceinline Vec3 operator + () const { return *this; };
00180 __forceinline Vec3 operator - () const { return (Vec3&)_mm_sub_ps(_mm_setzero_ps(), xyzw); };
00182
00184
00185 __forceinline bool operator == (const Vec3& v3Value) const { return x == v3Value.x && y == v3Value.y && z == v3Value.z; }
00186 __forceinline bool operator != (const Vec3& v3Value) const { return x != v3Value.x || y != v3Value.y || z != v3Value.z; }
00188
00189
00190 __forceinline Vec3 Cross(const Vec3& v3Value) const { return Cross(*this, v3Value); }
00191 static __forceinline Vec3 Cross(const Vec3& v3A, const Vec3& v3B)
00192 {
00193 __m128 mm1 = *(__m128*)&v3A;
00194 __m128 mm2 = *(__m128*)&v3B;
00195 __m128 mm1b = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 0, 2, 1));
00196 __m128 mm2b = TA_MM_SHUFFLE(mm2, _MM_SHUFFLE(3, 1, 0, 2));
00197
00198 mm2b = _mm_mul_ps(mm2b, mm1b);
00199 __m128 mm1c = TA_MM_SHUFFLE(mm1b, _MM_SHUFFLE(3, 0, 2, 1));
00200 __m128 mm2c = TA_MM_SHUFFLE(mm2, _MM_SHUFFLE(3, 0, 2, 1));
00201 mm1c = _mm_mul_ps(mm1c, mm2c);
00202 mm2b = _mm_sub_ps(mm2b, mm1c);
00203 return (Vec3&)mm2b;
00204 }
00205
00206 static __forceinline Vec3 TA_VEC3_CALL CrossWithUnitX(const Vec3& v3Value) { return Vec3(0.0f, v3Value.z, -v3Value.y); }
00207 __forceinline Vec3 CrossWithUnitX() const { return CrossWithUnitX(*this); }
00208 static __forceinline Vec3 TA_VEC3_CALL CrossWithUnitY(const Vec3& v3Value) { return Vec3(-v3Value.z, 0.0f, v3Value.x); }
00209 __forceinline Vec3 CrossWithUnitY() const { return CrossWithUnitY(*this); }
00210 static __forceinline Vec3 TA_VEC3_CALL CrossWithUnitZ(const Vec3& v3Value) { return Vec3(v3Value.y, -v3Value.x, 0.0f); }
00211 __forceinline Vec3 CrossWithUnitZ() const { return CrossWithUnitZ(*this); }
00212 static __forceinline float TA_VEC3_CALL CrossX(const Vec3& v3A, const Vec3& v3B) { return v3A.y * v3B.z - v3A.z * v3B.y; }
00213 __forceinline float CrossX(const Vec3& v3Value) const { return CrossX(*this, v3Value); }
00214 static __forceinline float TA_VEC3_CALL CrossY(const Vec3& v3A, const Vec3& v3B) { return v3A.z * v3B.x - v3A.x * v3B.z; }
00215 __forceinline float CrossY(const Vec3& v3Value) const { return CrossY(*this, v3Value); }
00216 static __forceinline float TA_VEC3_CALL CrossZ(const Vec3& v3A, const Vec3& v3B) { return v3A.x * v3B.y - v3A.y * v3B.x; }
00217 __forceinline float CrossZ(const Vec3& v3Value) const { return CrossZ(*this, v3Value); }
00218
00219
00220
00221 static __forceinline Meta::VecDot TA_VEC3_CALL Dot(const Vec3& v3A, const Vec3& v3B) { return Meta::VecDot(v3A, v3B); }
00222 __forceinline Meta::VecDot Dot(const Vec3& v3Value) const { return Meta::VecDot(*this, v3Value); }
00223
00224
00225 static __forceinline Vec3 TA_VEC3_CALL Min(const Vec3& v3A, const Vec3& v3B) { return (Vec3&)_mm_min_ps((__m128&)v3A, (__m128&)v3B); }
00226 static __forceinline Vec3 TA_VEC3_CALL Max(const Vec3& v3A, const Vec3& v3B) { return (Vec3&)_mm_max_ps((__m128&)v3A, (__m128&)v3B); }
00227
00228
00229 static __forceinline Meta::VecMagnitude TA_VEC3_CALL GetMagnitude(const Vec3& v3Value) { return Meta::VecMagnitude(v3Value); }
00230 __forceinline Meta::VecMagnitude GetMagnitude() const { return Meta::VecMagnitude(*this); }
00231
00232
00233 static __forceinline Meta::VecMagnitudeSqrd TA_VEC3_CALL GetMagnitudeSqrd(const Vec3& v3Value) { return Meta::VecMagnitudeSqrd(v3Value); }
00234 __forceinline Meta::VecMagnitudeSqrd GetMagnitudeSqrd() const { return Meta::VecMagnitudeSqrd(*this); }
00235
00236
00237 static __forceinline Vec3 TA_VEC3_CALL GetNormal(const Vec3& v3Value) { return v3Value.GetNormal(); }
00238 __forceinline Vec3 GetNormal() const
00239 {
00240 __m128 mm4 = xyzw;
00241 __m128 mm1 = _mm_mul_ps(mm4, mm4);
00242 __m128 mm2 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 3, 3, 1));
00243 __m128 mm3 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 3, 3, 2));
00244 mm1 = _mm_add_ss(mm1, mm2);
00245 mm1 = _mm_add_ss(mm1, mm3);
00246 mm1 = _mm_rsqrt_ss(mm1);
00247 mm1 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(0, 0, 0, 0));
00248 mm4 = _mm_mul_ps(mm4, mm1);
00249
00250 return (Vec3&)mm4;
00251 }
00252 __forceinline void Normalise()
00253 {
00254 __m128 mm4 = xyzw;
00255 __m128 mm1 = _mm_mul_ps(mm4, mm4);
00256 __m128 mm2 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 3, 3, 1));
00257 __m128 mm3 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(3, 3, 3, 2));
00258 mm1 = _mm_add_ss(mm1, mm2);
00259 mm1 = _mm_add_ss(mm1, mm3);
00260 mm1 = _mm_rsqrt_ss(mm1);
00261 mm1 = TA_MM_SHUFFLE(mm1, _MM_SHUFFLE(0, 0, 0, 0));
00262 mm4 = _mm_mul_ps(mm4, mm1);
00263
00264 xyzw = mm4;
00265 }
00266
00267
00268 __forceinline void Clear() { (__m128&)*this = _mm_setzero_ps(); }
00269 __forceinline bool IsNormalised() const { float fMag = GetMagnitudeSqrd(); return IsEqualToOneWithInError(fMag); }
00270 __forceinline bool IsZero() const { return GetMagnitudeSqrd() == 0.0f; }
00271 __forceinline int GetGreatestAxis() const;
00272 __forceinline void GetAxisOrder(int pnAxisOrder[3]) const;
00273 __forceinline float GetAxis(int nIndex) const { return (*this)[nIndex]; }
00274 __forceinline static const Vec3& TA_VEC3_CALL GetUnitVector(int nIndex);
00275 inline bool IsEqualWithInError(const Vec3& v3Value, float fError) const;
00276 inline bool IsValid() const { return FloatIsOK(x) && FloatIsOK(y) && FloatIsOK(z); }
00277 };
00278
00279 const Vec3 k_v3Zero(0.0f, 0.0f, 0.0f);
00280 const Vec3 k_v3UnitX(1.0f, 0.0f, 0.0f);
00281 const Vec3 k_v3UnitY(0.0f, 1.0f, 0.0f);
00282 const Vec3 k_v3UnitZ(0.0f, 0.0f, 1.0f);
00283
00284 }
00285
00286 #include "Vec3SSE.inl"
00287
00288 #endif // DOXYGEN
00289
© Copyright 2004-2006 TRUE AXIS PTY LTD Australia. All rights reserved.