$treeview $search $mathjax
Eigen-unsupported
3.2.5
$projectbrief
|
$projectbrief
|
$searchbox |
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> 00005 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H 00012 #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00019 template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); } 00020 00021 #ifdef EIGEN_VECTORIZE_SSE 00022 00023 template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x) 00024 { 00025 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); 00026 _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); 00027 _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); 00028 00029 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); 00030 00031 _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); 00032 _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5); 00033 00034 _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); 00035 _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); 00036 _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); 00037 _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); 00038 _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); 00039 00040 Packet4f a = pabs(x);//got the absolute value 00041 00042 Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit 00043 00044 Packet4f z1,z2;//will need them during computation 00045 00046 00047 //will compute the two branches for asin 00048 //so first compare with half 00049 00050 Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take 00051 //both will be taken, and finally results will be merged 00052 //the branch for values >0.5 00053 00054 { 00055 //the core series expansion 00056 z1=pmadd(p4f_minus_half,a,p4f_half); 00057 Packet4f x1=psqrt(z1); 00058 Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2); 00059 Packet4f s2=pmadd(s1, z1, p4f_asin3); 00060 Packet4f s3=pmadd(s2,z1, p4f_asin4); 00061 Packet4f s4=pmadd(s3,z1, p4f_asin5); 00062 Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd 00063 z1=pmadd(temp,x1,x1); 00064 z1=padd(z1,z1); 00065 z1=psub(p4f_pi_over_2,z1); 00066 } 00067 00068 { 00069 //the core series expansion 00070 Packet4f x2=a; 00071 z2=pmul(x2,x2); 00072 Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2); 00073 Packet4f s2=pmadd(s1, z2, p4f_asin3); 00074 Packet4f s3=pmadd(s2,z2, p4f_asin4); 00075 Packet4f s4=pmadd(s3,z2, p4f_asin5); 00076 Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd 00077 z2=pmadd(temp,x2,x2); 00078 } 00079 00080 /* select the correct result from the two branch evaluations */ 00081 z1 = _mm_and_ps(branch_mask, z1); 00082 z2 = _mm_andnot_ps(branch_mask, z2); 00083 Packet4f z = _mm_or_ps(z1,z2); 00084 00085 /* update the sign */ 00086 return _mm_xor_ps(z, sign_bit); 00087 } 00088 00089 #endif // EIGEN_VECTORIZE_SSE 00090 00091 } // end namespace internal 00092 00093 } // end namespace Eigen 00094 00095 #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H