IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
AtanPack.h
Go to the documentation of this file.
1 // AtanPack.h:
3 // -----------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__
17 
22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AtanReg.h>
25 #include <boost/type_traits/is_float.hpp>
26 #include <IPSDKUtil/InstructionSet/Pack.h>
40 
41 namespace ipsdk {
42 namespace simd {
43 namespace detail {
44 
47 
54 template <eInstructionSet::domain instructionSet, typename T,
55  typename Enable=void>
56 struct AtanPack;
57 
59 
60 // implementation for standard instruction set and floating points type
61 template <typename T>
63  typename boost::enable_if<boost::is_float<T> >::type>
64 {
65  static
66  IPSDK_FORCEINLINE
69  {
72  }
73 
74  static
75  IPSDK_FORCEINLINE
76  void
79  {
82  }
83 };
84 
85 // implementation for floating points type
86 template <eInstructionSet::domain IS, typename T>
87 struct AtanPack<IS, T,
88  typename boost::enable_if<boost::is_float<T> >::type>
89 {
92  static IPSDK_FORCEINLINE const T g_u0() {return static_cast<T>(-8.750608600031904122785E-1);}
93  static IPSDK_FORCEINLINE const T g_u1() {return static_cast<T>(-1.615753718733365076637E1);}
94  static IPSDK_FORCEINLINE const T g_u2() {return static_cast<T>(-7.500855792314704667340E1);}
95  static IPSDK_FORCEINLINE const T g_u3() {return static_cast<T>(-1.228866684490136173410E2);}
96  static IPSDK_FORCEINLINE const T g_u4() {return static_cast<T>(-6.485021904942025371773E1);}
97  static IPSDK_FORCEINLINE const T g_v0() {return static_cast<T>(2.485846490142306297962E1);}
98  static IPSDK_FORCEINLINE const T g_v1() {return static_cast<T>(1.650270098316988542046E2);}
99  static IPSDK_FORCEINLINE const T g_v2() {return static_cast<T>(4.328810604912902668951E2);}
100  static IPSDK_FORCEINLINE const T g_v3() {return static_cast<T>(4.853903996359136964868E2);}
101  static IPSDK_FORCEINLINE const T g_v4() {return static_cast<T>(1.945506571482613964425E2);}
103 
104  static
105  IPSDK_FORCEINLINE
106  const typename IS2Pack<IS, T>::Type
107  act(const typename IS2Pack<IS, T>::Type& xPack)
108  {
109  typename IS2Pack<IS, T>::Type outPack;
110  act(xPack, outPack);
111 
112  return outPack;
113  }
114 
115  static
116  IPSDK_FORCEINLINE
117  void
118  act(const typename IS2Pack<IS, T>::Type& xPack,
119  typename IS2Pack<IS, T>::Type& outPack)
120  {
121  // define used polynomial constants
122  typedef typename IS2Pack<IS, T>::Type PackT;
123  typedef typename IS2MaskPack<IS, T>::Type MaskPackT;
124  PackT csteUPack[5], csteVPack[5];
125  simd::assign<IS>(csteUPack[0], g_u0());
126  simd::assign<IS>(csteUPack[1], g_u1());
127  simd::assign<IS>(csteUPack[2], g_u2());
128  simd::assign<IS>(csteUPack[3], g_u3());
129  simd::assign<IS>(csteUPack[4], g_u4());
130  simd::assign<IS>(csteVPack[0], g_v0());
131  simd::assign<IS>(csteVPack[1], g_v1());
132  simd::assign<IS>(csteVPack[2], g_v2());
133  simd::assign<IS>(csteVPack[3], g_v3());
134  simd::assign<IS>(csteVPack[4], g_v4());
135 
136  // define used constants
137  const T epsilon = NumericLimits<T>::sqrt_epsilon();
138  PackT cste0Pack, csteEpsilonPack, cste0P66Pack, csteTan3PiD8Pack, cste1Pack, csteM1Pack, cstePiD2Pack, cstePiD4Pack;
139  simd::assign<IS>(cste0Pack, 0);
140  simd::assign<IS>(csteEpsilonPack, epsilon);
141  simd::assign<IS>(csteTan3PiD8Pack, static_cast<T>(2.41421356237309504880)); // tan( 3*pi/8 ) = 2.41421356237309504880
142  simd::assign<IS>(cste0P66Pack, static_cast<T>(0.66));
143  simd::assign<IS>(cste1Pack, 1);
144  simd::assign<IS>(csteM1Pack, -1);
145  simd::assign<IS>(cstePiD2Pack, static_cast<T>(M_PI_2));
146  simd::assign<IS>(cstePiD4Pack, static_cast<T>(M_PI_4));
147 
148  // define input data sign
149  PackT signPack, usedXPack;
150  MaskPackT testX0Pack;
151  simd::isGreater<IS>(xPack, cste0Pack, testX0Pack);
152  simd::ifElse<IS>(testX0Pack, cste1Pack, csteM1Pack, signPack);
153  simd::mul<IS>(xPack, signPack, usedXPack);
154 
155  // test for data range
156  MaskPackT testXTan3PiD8Pack, testX0P66Pack;
157  simd::isGreater<IS>(usedXPack, csteTan3PiD8Pack, testXTan3PiD8Pack);
158  simd::isGreater<IS>(usedXPack, cste0P66Pack, testX0P66Pack);
159 
160  // update used x value and offset y result value in function of input data range
161  PackT yPack, xM1Pack, xP1Pack, xM1DXP1Pack, m1DXPack;
162  MaskPackT testAbsXEpsilonPack;
163  simd::add<IS>(usedXPack, cste1Pack, xP1Pack);
164  simd::sub<IS>(usedXPack, cste1Pack, xM1Pack);
165  simd::div<IS>(xM1Pack, xP1Pack, xM1DXP1Pack);
166  simd::isGreater<IS>(usedXPack, csteEpsilonPack, testAbsXEpsilonPack);
167  simd::ifElse<IS>(testAbsXEpsilonPack, usedXPack, cste1Pack, m1DXPack);
168  simd::div<IS>(csteM1Pack, m1DXPack, m1DXPack);
169  simd::ifElse<IS>(testX0P66Pack, xM1DXP1Pack, usedXPack, usedXPack);
170  simd::ifElse<IS>(testX0P66Pack, cstePiD4Pack, cste0Pack, yPack);
171  simd::ifElse<IS>(testXTan3PiD8Pack, m1DXPack, usedXPack, usedXPack);
172  simd::ifElse<IS>(testXTan3PiD8Pack, cstePiD2Pack, yPack, yPack);
173 
174  // z = x * x
175  PackT zPack;
176  simd::mul<IS>(usedXPack, usedXPack, zPack);
177 
178  // p = z * P(U, z) / P(V, z)
179  PackT pPack, denomPack;
180  simd::mul<IS>(zPack, csteUPack[0], pPack);
181  simd::add<IS>(pPack, csteUPack[1], pPack);
182  simd::mul<IS>(pPack, zPack, pPack);
183  simd::add<IS>(pPack, csteUPack[2], pPack);
184  simd::mul<IS>(pPack, zPack, pPack);
185  simd::add<IS>(pPack, csteUPack[3], pPack);
186  simd::mul<IS>(pPack, zPack, pPack);
187  simd::add<IS>(pPack, csteUPack[4], pPack);
188  simd::mul<IS>(pPack, zPack, pPack);
189  simd::add<IS>(zPack, csteVPack[0], denomPack);
190  simd::mul<IS>(denomPack, zPack, denomPack);
191  simd::add<IS>(denomPack, csteVPack[1], denomPack);
192  simd::mul<IS>(denomPack, zPack, denomPack);
193  simd::add<IS>(denomPack, csteVPack[2], denomPack);
194  simd::mul<IS>(denomPack, zPack, denomPack);
195  simd::add<IS>(denomPack, csteVPack[3], denomPack);
196  simd::mul<IS>(denomPack, zPack, denomPack);
197  simd::add<IS>(denomPack, csteVPack[4], denomPack);
198  simd::div<IS>(pPack, denomPack, pPack);
199 
200  // p = x * p + x;
201  simd::mul<IS>(pPack, usedXPack, pPack);
202  simd::add<IS>(pPack, usedXPack, pPack);
203 
204  // y = y + p
205  simd::add<IS>(yPack, pPack, yPack);
206 
207  // take into account input sign for x
208  simd::mul<IS>(yPack, signPack, outPack);
209  }
210 };
211 
213 
216 
217 } // end of namespace detail
218 } // end of namespace simd
219 } // end of namespace ipsdk
220 
221 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_ATANPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
AtanReg template specialization for instruction set Standard.
Definition: AtanReg.h:31
abs function; returns the absolute value of a pack
Definition: DataItemNodeHdrMacrosDetails.h:48
Definition: AtanPack.h:56
mul function; returns the product of 2 input pack operandes
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
sqrt function; returns the square root of a pack
load function; loads a buffer into a pack
function assigning a given value of type T to a given Pack<instructionSet, T>
Predefined types associated to instruction set management.
div function; returns the quotient of 2 input pack operandes
Definition of import/export macro for library.
sub function; returns the result of an arithmetic substraction on all the elements of 2 input pack op...
unload function; unloads a pack into a memory buffer
Definition: UnaryPackOp.h:31
add function; returns the result of an arithmetic add operation on all the elements of 2 input pack o...
max function; returns the maximum of 2 packs
compiler optimisations only
Definition: InstructionSetTypes.h:34
Definition: IS2Pack.h:34