15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__ 22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AtanReg.h> 25 #include <boost/type_traits/is_float.hpp> 26 #include <IPSDKUtil/InstructionSet/Pack.h> 54 template <eInstructionSet::domain instructionSet,
typename T,
63 typename boost::enable_if<boost::is_float<T> >::type>
86 template <eInstructionSet::domain IS,
typename T>
87 struct AtanPack<IS, T,
88 typename
boost::enable_if<boost::is_float<T> >::type>
92 static IPSDK_FORCEINLINE
const T g_u0() {
return static_cast<T
>(-8.750608600031904122785E-1);}
93 static IPSDK_FORCEINLINE
const T g_u1() {
return static_cast<T
>(-1.615753718733365076637E1);}
94 static IPSDK_FORCEINLINE
const T g_u2() {
return static_cast<T
>(-7.500855792314704667340E1);}
95 static IPSDK_FORCEINLINE
const T g_u3() {
return static_cast<T
>(-1.228866684490136173410E2);}
96 static IPSDK_FORCEINLINE
const T g_u4() {
return static_cast<T
>(-6.485021904942025371773E1);}
97 static IPSDK_FORCEINLINE
const T g_v0() {
return static_cast<T
>(2.485846490142306297962E1);}
98 static IPSDK_FORCEINLINE
const T g_v1() {
return static_cast<T
>(1.650270098316988542046E2);}
99 static IPSDK_FORCEINLINE
const T g_v2() {
return static_cast<T
>(4.328810604912902668951E2);}
100 static IPSDK_FORCEINLINE
const T g_v3() {
return static_cast<T
>(4.853903996359136964868E2);}
101 static IPSDK_FORCEINLINE
const T g_v4() {
return static_cast<T
>(1.945506571482613964425E2);}
106 const typename IS2Pack<IS, T>::Type
107 act(
const typename IS2Pack<IS, T>::Type& xPack)
109 typename IS2Pack<IS, T>::Type outPack;
118 act(
const typename IS2Pack<IS, T>::Type& xPack,
119 typename IS2Pack<IS, T>::Type& outPack)
122 typedef typename IS2Pack<IS, T>::Type PackT;
123 typedef typename IS2MaskPack<IS, T>::Type MaskPackT;
124 PackT csteUPack[5], csteVPack[5];
125 simd::assign<IS>(csteUPack[0], g_u0());
126 simd::assign<IS>(csteUPack[1], g_u1());
127 simd::assign<IS>(csteUPack[2], g_u2());
128 simd::assign<IS>(csteUPack[3], g_u3());
129 simd::assign<IS>(csteUPack[4], g_u4());
130 simd::assign<IS>(csteVPack[0], g_v0());
131 simd::assign<IS>(csteVPack[1], g_v1());
132 simd::assign<IS>(csteVPack[2], g_v2());
133 simd::assign<IS>(csteVPack[3], g_v3());
134 simd::assign<IS>(csteVPack[4], g_v4());
137 const T epsilon = NumericLimits<T>::sqrt_epsilon();
138 PackT cste0Pack, csteEpsilonPack, cste0P66Pack, csteTan3PiD8Pack, cste1Pack, csteM1Pack, cstePiD2Pack, cstePiD4Pack;
139 simd::assign<IS>(cste0Pack, 0);
140 simd::assign<IS>(csteEpsilonPack, epsilon);
141 simd::assign<IS>(csteTan3PiD8Pack,
static_cast<T
>(2.41421356237309504880));
142 simd::assign<IS>(cste0P66Pack,
static_cast<T
>(0.66));
143 simd::assign<IS>(cste1Pack, 1);
144 simd::assign<IS>(csteM1Pack, -1);
145 simd::assign<IS>(cstePiD2Pack,
static_cast<T
>(M_PI_2));
146 simd::assign<IS>(cstePiD4Pack,
static_cast<T
>(M_PI_4));
149 PackT signPack, usedXPack;
150 MaskPackT testX0Pack;
151 simd::isGreater<IS>(xPack, cste0Pack, testX0Pack);
152 simd::ifElse<IS>(testX0Pack, cste1Pack, csteM1Pack, signPack);
153 simd::mul<IS>(xPack, signPack, usedXPack);
156 MaskPackT testXTan3PiD8Pack, testX0P66Pack;
157 simd::isGreater<IS>(usedXPack, csteTan3PiD8Pack, testXTan3PiD8Pack);
158 simd::isGreater<IS>(usedXPack, cste0P66Pack, testX0P66Pack);
161 PackT yPack, xM1Pack, xP1Pack, xM1DXP1Pack, m1DXPack;
162 MaskPackT testAbsXEpsilonPack;
163 simd::add<IS>(usedXPack, cste1Pack, xP1Pack);
164 simd::sub<IS>(usedXPack, cste1Pack, xM1Pack);
165 simd::div<IS>(xM1Pack, xP1Pack, xM1DXP1Pack);
166 simd::isGreater<IS>(usedXPack, csteEpsilonPack, testAbsXEpsilonPack);
167 simd::ifElse<IS>(testAbsXEpsilonPack, usedXPack, cste1Pack, m1DXPack);
168 simd::div<IS>(csteM1Pack, m1DXPack, m1DXPack);
169 simd::ifElse<IS>(testX0P66Pack, xM1DXP1Pack, usedXPack, usedXPack);
170 simd::ifElse<IS>(testX0P66Pack, cstePiD4Pack, cste0Pack, yPack);
171 simd::ifElse<IS>(testXTan3PiD8Pack, m1DXPack, usedXPack, usedXPack);
172 simd::ifElse<IS>(testXTan3PiD8Pack, cstePiD2Pack, yPack, yPack);
176 simd::mul<IS>(usedXPack, usedXPack, zPack);
179 PackT pPack, denomPack;
180 simd::mul<IS>(zPack, csteUPack[0], pPack);
181 simd::add<IS>(pPack, csteUPack[1], pPack);
182 simd::mul<IS>(pPack, zPack, pPack);
183 simd::add<IS>(pPack, csteUPack[2], pPack);
184 simd::mul<IS>(pPack, zPack, pPack);
185 simd::add<IS>(pPack, csteUPack[3], pPack);
186 simd::mul<IS>(pPack, zPack, pPack);
187 simd::add<IS>(pPack, csteUPack[4], pPack);
188 simd::mul<IS>(pPack, zPack, pPack);
189 simd::add<IS>(zPack, csteVPack[0], denomPack);
190 simd::mul<IS>(denomPack, zPack, denomPack);
191 simd::add<IS>(denomPack, csteVPack[1], denomPack);
192 simd::mul<IS>(denomPack, zPack, denomPack);
193 simd::add<IS>(denomPack, csteVPack[2], denomPack);
194 simd::mul<IS>(denomPack, zPack, denomPack);
195 simd::add<IS>(denomPack, csteVPack[3], denomPack);
196 simd::mul<IS>(denomPack, zPack, denomPack);
197 simd::add<IS>(denomPack, csteVPack[4], denomPack);
198 simd::div<IS>(pPack, denomPack, pPack);
201 simd::mul<IS>(pPack, usedXPack, pPack);
202 simd::add<IS>(pPack, usedXPack, pPack);
205 simd::add<IS>(yPack, pPack, yPack);
208 simd::mul<IS>(yPack, signPack, outPack);
221 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
AtanReg template specialization for instruction set Standard.
Definition: AtanReg.h:31
abs function; returns the absolute value of a pack
Definition: DataItemNodeHdrMacrosDetails.h:48
Definition: AtanPack.h:56
mul function; returns the product of 2 input pack operandes
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
sqrt function; returns the square root of a pack
load function; loads a buffer into a pack
function assigning a given value of type T to a given Pack<instructionSet, T>
Predefined types associated to instruction set management.
div function; returns the quotient of 2 input pack operandes
Definition of import/export macro for library.
sub function; returns the result of an arithmetic substraction on all the elements of 2 input pack op...
unload function; unloads a pack into a memory buffer
Definition: UnaryPackOp.h:31
add function; returns the result of an arithmetic add operation on all the elements of 2 input pack o...
max function; returns the maximum of 2 packs
compiler optimisations only
Definition: InstructionSetTypes.h:34