$treeview $search $mathjax
Eigen
3.2.5
$projectbrief
|
$projectbrief
|
$searchbox |
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> 00005 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_GEOMETRY_SSE_H 00012 #define EIGEN_GEOMETRY_SSE_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 template<class Derived, class OtherDerived> 00019 struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned> 00020 { 00021 static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) 00022 { 00023 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); 00024 Quaternion<float> res; 00025 __m128 a = _a.coeffs().template packet<Aligned>(0); 00026 __m128 b = _b.coeffs().template packet<Aligned>(0); 00027 __m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,1,2,0,2), 00028 vec4f_swizzle1(b,2,0,1,2)),mask); 00029 __m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,3,3,3,1), 00030 vec4f_swizzle1(b,0,1,2,1)),mask); 00031 pstore(&res.x(), 00032 _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), 00033 _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), 00034 vec4f_swizzle1(b,1,2,0,0))), 00035 _mm_add_ps(flip1,flip2))); 00036 return res; 00037 } 00038 }; 00039 00040 template<typename VectorLhs,typename VectorRhs> 00041 struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> 00042 { 00043 static inline typename plain_matrix_type<VectorLhs>::type 00044 run(const VectorLhs& lhs, const VectorRhs& rhs) 00045 { 00046 __m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0); 00047 __m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0); 00048 __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); 00049 __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); 00050 typename plain_matrix_type<VectorLhs>::type res; 00051 pstore(&res.x(),_mm_sub_ps(mul1,mul2)); 00052 return res; 00053 } 00054 }; 00055 00056 00057 00058 00059 template<class Derived, class OtherDerived> 00060 struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned> 00061 { 00062 static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) 00063 { 00064 const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); 00065 00066 Quaternion<double> res; 00067 00068 const double* a = _a.coeffs().data(); 00069 Packet2d b_xy = _b.coeffs().template packet<Aligned>(0); 00070 Packet2d b_zw = _b.coeffs().template packet<Aligned>(2); 00071 Packet2d a_xx = pset1<Packet2d>(a[0]); 00072 Packet2d a_yy = pset1<Packet2d>(a[1]); 00073 Packet2d a_zz = pset1<Packet2d>(a[2]); 00074 Packet2d a_ww = pset1<Packet2d>(a[3]); 00075 00076 // two temporaries: 00077 Packet2d t1, t2; 00078 00079 /* 00080 * t1 = ww*xy + yy*zw 00081 * t2 = zz*xy - xx*zw 00082 * res.xy = t1 +/- swap(t2) 00083 */ 00084 t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw)); 00085 t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); 00086 #ifdef EIGEN_VECTORIZE_SSE3 00087 EIGEN_UNUSED_VARIABLE(mask) 00088 pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); 00089 #else 00090 pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); 00091 #endif 00092 00093 /* 00094 * t1 = ww*zw - yy*xy 00095 * t2 = zz*zw + xx*xy 00096 * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2) 00097 */ 00098 t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy)); 00099 t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); 00100 #ifdef EIGEN_VECTORIZE_SSE3 00101 EIGEN_UNUSED_VARIABLE(mask) 00102 pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); 00103 #else 00104 pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); 00105 #endif 00106 00107 return res; 00108 } 00109 }; 00110 00111 } // end namespace internal 00112 00113 } // end namespace Eigen 00114 00115 #endif // EIGEN_GEOMETRY_SSE_H