$treeview $search $mathjax
Eigen
3.2.5
$projectbrief
|
$projectbrief
|
$searchbox |
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H 00011 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 // pack a selfadjoint block diagonal for use with the gebp_kernel 00018 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder> 00019 struct symm_pack_lhs 00020 { 00021 template<int BlockRows> inline 00022 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) 00023 { 00024 // normal copy 00025 for(Index k=0; k<i; k++) 00026 for(Index w=0; w<BlockRows; w++) 00027 blockA[count++] = lhs(i+w,k); // normal 00028 // symmetric copy 00029 Index h = 0; 00030 for(Index k=i; k<i+BlockRows; k++) 00031 { 00032 for(Index w=0; w<h; w++) 00033 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 00034 00035 blockA[count++] = numext::real(lhs(k,k)); // real (diagonal) 00036 00037 for(Index w=h+1; w<BlockRows; w++) 00038 blockA[count++] = lhs(i+w, k); // normal 00039 ++h; 00040 } 00041 // transposed copy 00042 for(Index k=i+BlockRows; k<cols; k++) 00043 for(Index w=0; w<BlockRows; w++) 00044 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 00045 } 00046 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows) 00047 { 00048 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); 00049 Index count = 0; 00050 Index peeled_mc = (rows/Pack1)*Pack1; 00051 for(Index i=0; i<peeled_mc; i+=Pack1) 00052 { 00053 pack<Pack1>(blockA, lhs, cols, i, count); 00054 } 00055 00056 if(rows-peeled_mc>=Pack2) 00057 { 00058 pack<Pack2>(blockA, lhs, cols, peeled_mc, count); 00059 peeled_mc += Pack2; 00060 } 00061 00062 // do the same with mr==1 00063 for(Index i=peeled_mc; i<rows; i++) 00064 { 00065 for(Index k=0; k<i; k++) 00066 blockA[count++] = lhs(i, k); // normal 00067 00068 blockA[count++] = numext::real(lhs(i, i)); // real (diagonal) 00069 00070 for(Index k=i+1; k<cols; k++) 00071 blockA[count++] = numext::conj(lhs(k, i)); // transposed 00072 } 00073 } 00074 }; 00075 00076 template<typename Scalar, typename Index, int nr, int StorageOrder> 00077 struct symm_pack_rhs 00078 { 00079 enum { PacketSize = packet_traits<Scalar>::size }; 00080 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) 00081 { 00082 Index end_k = k2 + rows; 00083 Index count = 0; 00084 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); 00085 Index packet_cols = (cols/nr)*nr; 00086 00087 // first part: normal case 00088 for(Index j2=0; j2<k2; j2+=nr) 00089 { 00090 for(Index k=k2; k<end_k; k++) 00091 { 00092 blockB[count+0] = rhs(k,j2+0); 00093 blockB[count+1] = rhs(k,j2+1); 00094 if (nr==4) 00095 { 00096 blockB[count+2] = rhs(k,j2+2); 00097 blockB[count+3] = rhs(k,j2+3); 00098 } 00099 count += nr; 00100 } 00101 } 00102 00103 // second part: diagonal block 00104 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr) 00105 { 00106 // again we can split vertically in three different parts (transpose, symmetric, normal) 00107 // transpose 00108 for(Index k=k2; k<j2; k++) 00109 { 00110 blockB[count+0] = numext::conj(rhs(j2+0,k)); 00111 blockB[count+1] = numext::conj(rhs(j2+1,k)); 00112 if (nr==4) 00113 { 00114 blockB[count+2] = numext::conj(rhs(j2+2,k)); 00115 blockB[count+3] = numext::conj(rhs(j2+3,k)); 00116 } 00117 count += nr; 00118 } 00119 // symmetric 00120 Index h = 0; 00121 for(Index k=j2; k<j2+nr; k++) 00122 { 00123 // normal 00124 for (Index w=0 ; w<h; ++w) 00125 blockB[count+w] = rhs(k,j2+w); 00126 00127 blockB[count+h] = numext::real(rhs(k,k)); 00128 00129 // transpose 00130 for (Index w=h+1 ; w<nr; ++w) 00131 blockB[count+w] = numext::conj(rhs(j2+w,k)); 00132 count += nr; 00133 ++h; 00134 } 00135 // normal 00136 for(Index k=j2+nr; k<end_k; k++) 00137 { 00138 blockB[count+0] = rhs(k,j2+0); 00139 blockB[count+1] = rhs(k,j2+1); 00140 if (nr==4) 00141 { 00142 blockB[count+2] = rhs(k,j2+2); 00143 blockB[count+3] = rhs(k,j2+3); 00144 } 00145 count += nr; 00146 } 00147 } 00148 00149 // third part: transposed 00150 for(Index j2=k2+rows; j2<packet_cols; j2+=nr) 00151 { 00152 for(Index k=k2; k<end_k; k++) 00153 { 00154 blockB[count+0] = numext::conj(rhs(j2+0,k)); 00155 blockB[count+1] = numext::conj(rhs(j2+1,k)); 00156 if (nr==4) 00157 { 00158 blockB[count+2] = numext::conj(rhs(j2+2,k)); 00159 blockB[count+3] = numext::conj(rhs(j2+3,k)); 00160 } 00161 count += nr; 00162 } 00163 } 00164 00165 // copy the remaining columns one at a time (=> the same with nr==1) 00166 for(Index j2=packet_cols; j2<cols; ++j2) 00167 { 00168 // transpose 00169 Index half = (std::min)(end_k,j2); 00170 for(Index k=k2; k<half; k++) 00171 { 00172 blockB[count] = numext::conj(rhs(j2,k)); 00173 count += 1; 00174 } 00175 00176 if(half==j2 && half<k2+rows) 00177 { 00178 blockB[count] = numext::real(rhs(j2,j2)); 00179 count += 1; 00180 } 00181 else 00182 half--; 00183 00184 // normal 00185 for(Index k=half+1; k<k2+rows; k++) 00186 { 00187 blockB[count] = rhs(k,j2); 00188 count += 1; 00189 } 00190 } 00191 } 00192 }; 00193 00194 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of 00195 * the general matrix matrix product. 00196 */ 00197 template <typename Scalar, typename Index, 00198 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 00199 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, 00200 int ResStorageOrder> 00201 struct product_selfadjoint_matrix; 00202 00203 template <typename Scalar, typename Index, 00204 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 00205 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs> 00206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> 00207 { 00208 00209 static EIGEN_STRONG_INLINE void run( 00210 Index rows, Index cols, 00211 const Scalar* lhs, Index lhsStride, 00212 const Scalar* rhs, Index rhsStride, 00213 Scalar* res, Index resStride, 00214 const Scalar& alpha) 00215 { 00216 product_selfadjoint_matrix<Scalar, Index, 00217 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 00218 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), 00219 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 00220 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), 00221 ColMajor> 00222 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); 00223 } 00224 }; 00225 00226 template <typename Scalar, typename Index, 00227 int LhsStorageOrder, bool ConjugateLhs, 00228 int RhsStorageOrder, bool ConjugateRhs> 00229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> 00230 { 00231 00232 static EIGEN_DONT_INLINE void run( 00233 Index rows, Index cols, 00234 const Scalar* _lhs, Index lhsStride, 00235 const Scalar* _rhs, Index rhsStride, 00236 Scalar* res, Index resStride, 00237 const Scalar& alpha); 00238 }; 00239 00240 template <typename Scalar, typename Index, 00241 int LhsStorageOrder, bool ConjugateLhs, 00242 int RhsStorageOrder, bool ConjugateRhs> 00243 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run( 00244 Index rows, Index cols, 00245 const Scalar* _lhs, Index lhsStride, 00246 const Scalar* _rhs, Index rhsStride, 00247 Scalar* res, Index resStride, 00248 const Scalar& alpha) 00249 { 00250 Index size = rows; 00251 00252 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00253 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 00254 00255 typedef gebp_traits<Scalar,Scalar> Traits; 00256 00257 Index kc = size; // cache block size along the K direction 00258 Index mc = rows; // cache block size along the M direction 00259 Index nc = cols; // cache block size along the N direction 00260 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); 00261 // kc must smaller than mc 00262 kc = (std::min)(kc,mc); 00263 00264 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00265 std::size_t sizeB = sizeW + kc*cols; 00266 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); 00267 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); 00268 Scalar* blockB = allocatedBlockB + sizeW; 00269 00270 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 00271 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00272 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 00273 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; 00274 00275 for(Index k2=0; k2<size; k2+=kc) 00276 { 00277 const Index actual_kc = (std::min)(k2+kc,size)-k2; 00278 00279 // we have selected one row panel of rhs and one column panel of lhs 00280 // pack rhs's panel into a sequential chunk of memory 00281 // and expand each coeff to a constant packet for further reuse 00282 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols); 00283 00284 // the select lhs's panel has to be split in three different parts: 00285 // 1 - the transposed panel above the diagonal block => transposed packed copy 00286 // 2 - the diagonal block => special packed copy 00287 // 3 - the panel below the diagonal block => generic packed copy 00288 for(Index i2=0; i2<k2; i2+=mc) 00289 { 00290 const Index actual_mc = (std::min)(i2+mc,k2)-i2; 00291 // transposed packed copy 00292 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); 00293 00294 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00295 } 00296 // the block diagonal 00297 { 00298 const Index actual_mc = (std::min)(k2+kc,size)-k2; 00299 // symmetric packed copy 00300 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); 00301 00302 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00303 } 00304 00305 for(Index i2=k2+kc; i2<size; i2+=mc) 00306 { 00307 const Index actual_mc = (std::min)(i2+mc,size)-i2; 00308 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>() 00309 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00310 00311 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00312 } 00313 } 00314 } 00315 00316 // matrix * selfadjoint product 00317 template <typename Scalar, typename Index, 00318 int LhsStorageOrder, bool ConjugateLhs, 00319 int RhsStorageOrder, bool ConjugateRhs> 00320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> 00321 { 00322 00323 static EIGEN_DONT_INLINE void run( 00324 Index rows, Index cols, 00325 const Scalar* _lhs, Index lhsStride, 00326 const Scalar* _rhs, Index rhsStride, 00327 Scalar* res, Index resStride, 00328 const Scalar& alpha); 00329 }; 00330 00331 template <typename Scalar, typename Index, 00332 int LhsStorageOrder, bool ConjugateLhs, 00333 int RhsStorageOrder, bool ConjugateRhs> 00334 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run( 00335 Index rows, Index cols, 00336 const Scalar* _lhs, Index lhsStride, 00337 const Scalar* _rhs, Index rhsStride, 00338 Scalar* res, Index resStride, 00339 const Scalar& alpha) 00340 { 00341 Index size = cols; 00342 00343 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00344 00345 typedef gebp_traits<Scalar,Scalar> Traits; 00346 00347 Index kc = size; // cache block size along the K direction 00348 Index mc = rows; // cache block size along the M direction 00349 Index nc = cols; // cache block size along the N direction 00350 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); 00351 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00352 std::size_t sizeB = sizeW + kc*cols; 00353 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); 00354 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); 00355 Scalar* blockB = allocatedBlockB + sizeW; 00356 00357 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 00358 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00359 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 00360 00361 for(Index k2=0; k2<size; k2+=kc) 00362 { 00363 const Index actual_kc = (std::min)(k2+kc,size)-k2; 00364 00365 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2); 00366 00367 // => GEPP 00368 for(Index i2=0; i2<rows; i2+=mc) 00369 { 00370 const Index actual_mc = (std::min)(i2+mc,rows)-i2; 00371 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00372 00373 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00374 } 00375 } 00376 } 00377 00378 } // end namespace internal 00379 00380 /*************************************************************************** 00381 * Wrapper to product_selfadjoint_matrix 00382 ***************************************************************************/ 00383 00384 namespace internal { 00385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> 00386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > 00387 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > 00388 {}; 00389 } 00390 00391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> 00392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> 00393 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs > 00394 { 00395 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 00396 00397 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 00398 00399 enum { 00400 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, 00401 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, 00402 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, 00403 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint 00404 }; 00405 00406 template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const 00407 { 00408 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); 00409 00410 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 00411 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 00412 00413 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 00414 * RhsBlasTraits::extractScalarFactor(m_rhs); 00415 00416 internal::product_selfadjoint_matrix<Scalar, Index, 00417 EIGEN_LOGICAL_XOR(LhsIsUpper, 00418 internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, 00419 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), 00420 EIGEN_LOGICAL_XOR(RhsIsUpper, 00421 internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, 00422 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), 00423 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> 00424 ::run( 00425 lhs.rows(), rhs.cols(), // sizes 00426 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 00427 &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info 00428 &dst.coeffRef(0,0), dst.outerStride(), // result info 00429 actualAlpha // alpha 00430 ); 00431 } 00432 }; 00433 00434 } // end namespace Eigen 00435 00436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H