$treeview $search $mathjax
Eigen
3.2.5
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /* 00002 Copyright (c) 2011, Intel Corporation. All rights reserved. 00003 00004 Redistribution and use in source and binary forms, with or without modification, 00005 are permitted provided that the following conditions are met: 00006 00007 * Redistributions of source code must retain the above copyright notice, this 00008 list of conditions and the following disclaimer. 00009 * Redistributions in binary form must reproduce the above copyright notice, 00010 this list of conditions and the following disclaimer in the documentation 00011 and/or other materials provided with the distribution. 00012 * Neither the name of Intel Corporation nor the names of its contributors may 00013 be used to endorse or promote products derived from this software without 00014 specific prior written permission. 00015 00016 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 00020 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 00023 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 00027 ******************************************************************************** 00028 * Content : Eigen bindings to Intel(R) MKL 00029 * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() 00030 ******************************************************************************** 00031 */ 00032 00033 #ifndef EIGEN_ASSIGN_VML_H 00034 #define EIGEN_ASSIGN_VML_H 00035 00036 namespace Eigen { 00037 00038 namespace internal { 00039 00040 template<typename Op> struct vml_call 00041 { enum { IsSupported = 0 }; }; 00042 00043 template<typename Dst, typename Src, typename UnaryOp> 00044 class vml_assign_traits 00045 { 00046 private: 00047 enum { 00048 DstHasDirectAccess = Dst::Flags & DirectAccessBit, 00049 SrcHasDirectAccess = Src::Flags & DirectAccessBit, 00050 00051 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), 00052 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 00053 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 00054 : int(Dst::RowsAtCompileTime), 00055 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 00056 : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 00057 : int(Dst::MaxRowsAtCompileTime), 00058 MaxSizeAtCompileTime = Dst::SizeAtCompileTime, 00059 00060 MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess 00061 && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, 00062 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), 00063 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, 00064 LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, 00065 MayEnableVml = MightEnableVml && LargeEnough, 00066 MayLinearize = MayEnableVml && MightLinearize 00067 }; 00068 public: 00069 enum { 00070 Traversal = MayLinearize ? LinearVectorizedTraversal 00071 : MayEnableVml ? InnerVectorizedTraversal 00072 : DefaultTraversal 00073 }; 00074 }; 00075 00076 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling, 00077 int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal > 00078 struct vml_assign_impl 00079 : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn> 00080 { 00081 }; 00082 00083 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 00084 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal> 00085 { 00086 typedef typename Derived1::Scalar Scalar; 00087 typedef typename Derived1::Index Index; 00088 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 00089 { 00090 // in case we want to (or have to) skip VML at runtime we can call: 00091 // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 00092 const Index innerSize = dst.innerSize(); 00093 const Index outerSize = dst.outerSize(); 00094 for(Index outer = 0; outer < outerSize; ++outer) { 00095 const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : 00096 &(src.nestedExpression().coeffRef(0, outer)); 00097 Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); 00098 vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr ); 00099 } 00100 } 00101 }; 00102 00103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 00104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal> 00105 { 00106 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 00107 { 00108 // in case we want to (or have to) skip VML at runtime we can call: 00109 // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 00110 vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); 00111 } 00112 }; 00113 00114 // Macroses 00115 00116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ 00117 template<typename Derived1, typename Derived2, typename UnaryOp> \ 00118 struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \ 00119 static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \ 00120 vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \ 00121 } \ 00122 }; 00123 00124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) 00125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) 00126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) 00127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) 00128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) 00129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) 00130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) 00131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) 00132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) 00133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) 00134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) 00135 00136 00137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) 00138 #define EIGEN_MKL_VML_MODE VML_HA 00139 #else 00140 #define EIGEN_MKL_VML_MODE VML_LA 00141 #endif 00142 00143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 00144 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 00145 enum { IsSupported = 1 }; \ 00146 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 00147 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 00148 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ 00149 } \ 00150 }; 00151 00152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 00153 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 00154 enum { IsSupported = 1 }; \ 00155 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 00156 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 00157 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 00158 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ 00159 } \ 00160 }; 00161 00162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 00163 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 00164 enum { IsSupported = 1 }; \ 00165 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ 00166 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 00167 EIGENTYPE exponent = func.m_exponent; \ 00168 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 00169 VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ 00170 (VMLTYPE*)dst, &vmlMode); \ 00171 } \ 00172 }; 00173 00174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 00175 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ 00176 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) 00177 00178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ 00179 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ 00180 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) 00181 00182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ 00183 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 00184 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) 00185 00186 00187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 00188 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ 00189 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) 00190 00191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ 00192 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ 00193 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) 00194 00195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ 00196 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 00197 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) 00198 00199 00200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) 00201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) 00202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) 00203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) 00204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) 00205 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) 00206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) 00207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) 00208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) 00209 00210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) 00211 00212 // The vm*powx functions are not avaibale in the windows version of MKL. 00213 #ifndef _WIN32 00214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) 00215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) 00216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) 00217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) 00218 #endif 00219 00220 } // end namespace internal 00221 00222 } // end namespace Eigen 00223 00224 #endif // EIGEN_ASSIGN_VML_H