IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
AsinReg.h
Go to the documentation of this file.
1 // AsinReg.h:
3 // -------------------
4 //
15 
16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_ASINREG_H__
17 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_ASINREG_H__
18 
30 
32 
33 namespace ipsdk {
34 namespace simd {
35 namespace detail {
36 
39 
41 template <eInstructionSet::domain IS, typename T>
42 struct AsinReg<
43  IS,
44  T,
45  typename boost::enable_if<
46  typename boost::mpl::and_<
47  typename boost::mpl::not_equal_to<
48  boost::mpl::int_<IS>,
49  boost::mpl::int_<eInstructionSet::eIS_Standard>
50  >::type,
51  typename boost::is_float<T>::type
52  >::type
53  >::type
54 >
55 {
58  static IPSDK_FORCEINLINE const T g_p0() { return static_cast<T>(4.253011369004428248960E-3); }
59  static IPSDK_FORCEINLINE const T g_p1() { return static_cast<T>(-6.019598008014123785661E-1); }
60  static IPSDK_FORCEINLINE const T g_p2() { return static_cast<T>(5.444622390564711410273E0); }
61  static IPSDK_FORCEINLINE const T g_p3() { return static_cast<T>(-1.626247967210700244449E1); }
62  static IPSDK_FORCEINLINE const T g_p4() { return static_cast<T>(1.956261983317594739197E1); }
63  static IPSDK_FORCEINLINE const T g_p5() { return static_cast<T>(-8.198089802484824371615E0); }
64  static IPSDK_FORCEINLINE const T g_q0() { return static_cast<T>(-1.474091372988853791896E1); }
65  static IPSDK_FORCEINLINE const T g_q1() { return static_cast<T>(7.049610280856842141659E1); }
66  static IPSDK_FORCEINLINE const T g_q2() { return static_cast<T>(-1.471791292232726029859E2); }
67  static IPSDK_FORCEINLINE const T g_q3() { return static_cast<T>(1.395105614657485689735E2); }
68  static IPSDK_FORCEINLINE const T g_q4() { return static_cast<T>(-4.918853881490881290097E1); }
69  static IPSDK_FORCEINLINE const T g_r0() { return static_cast<T>(2.967721961301243206100E-3); }
70  static IPSDK_FORCEINLINE const T g_r1() { return static_cast<T>(-5.634242780008963776856E-1); }
71  static IPSDK_FORCEINLINE const T g_r2() { return static_cast<T>(6.968710824104713396794E0); }
72  static IPSDK_FORCEINLINE const T g_r3() { return static_cast<T>(-2.556901049652824852289E1); }
73  static IPSDK_FORCEINLINE const T g_r4() { return static_cast<T>(2.853665548261061424989E1); }
74  static IPSDK_FORCEINLINE const T g_s0() { return static_cast<T>(-2.194779531642920639778E1); }
75  static IPSDK_FORCEINLINE const T g_s1() { return static_cast<T>(1.470656354026814941758E2); }
76  static IPSDK_FORCEINLINE const T g_s2() { return static_cast<T>(-3.838770957603691357202E2); }
77  static IPSDK_FORCEINLINE const T g_s3() { return static_cast<T>(3.424398657913078477438E2); }
79 
80  static IPSDK_FORCEINLINE
81  typename RegType<IS, T>::Type
82  act(const typename RegType<IS, T>::Type& in)
83  {
84  typename RegType<IS, T>::Type out;
85  act(in, out);
86  return out;
87  }
88 
89  static IPSDK_FORCEINLINE
90  void
91  act(const typename RegType<IS, T>::Type& in,
92  typename RegType<IS, T>::Type& out)
93  {
94  typedef typename RegType<IS, T>::Type Reg;
95  typedef typename RegMaskType<IS, T>::Type MaskReg;
96 
97  Reg cste0Reg, cste0P625Reg, cste1Reg, cstePiD4Reg, csteMinus1Reg;
100  simd::detail::AssignReg<IS, T>::act(csteMinus1Reg, -1);
101  simd::detail::AssignReg<IS, T>::act(cste0P625Reg, static_cast<T>(0.625));
102  simd::detail::AssignReg<IS, T>::act(cstePiD4Reg, static_cast<T>(M_PI_4));
103 
104  // check for input data range
105  Reg aReg;
106  MaskReg test0P625Reg, test0Reg;
108  simd::detail::IsGreaterReg<IS, T>::act(aReg, cste0P625Reg, test0P625Reg);
109  simd::detail::IsGreaterReg<IS, T>::act(in, cste0Reg, test0Reg);
110 
111  // define numerator polynomial constants
112  Reg csteRReg, cstePReg;
113  Reg csteNumReg[6];
115  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p0());
116  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[0]);
117  simd::detail::AssignReg<IS, T>::act(csteRReg, g_r0());
118  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p1());
119  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[1]);
120  simd::detail::AssignReg<IS, T>::act(csteRReg, g_r1());
121  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p2());
122  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[2]);
123  simd::detail::AssignReg<IS, T>::act(csteRReg, g_r2());
124  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p3());
125  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[3]);
126  simd::detail::AssignReg<IS, T>::act(csteRReg, g_r3());
127  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p4());
128  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[4]);
129  simd::detail::AssignReg<IS, T>::act(csteRReg, g_r4());
130  simd::detail::AssignReg<IS, T>::act(cstePReg, g_p5());
131  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteRReg, cstePReg, csteNumReg[5]);
132 
133  // define denominator polynomial constants
134  Reg csteSReg, csteQReg;
135  Reg csteDenomReg[5];
137  simd::detail::AssignReg<IS, T>::act(csteQReg, g_q0());
138  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteSReg, csteQReg, csteDenomReg[0]);
139  simd::detail::AssignReg<IS, T>::act(csteSReg, g_s0());
140  simd::detail::AssignReg<IS, T>::act(csteQReg, g_q1());
141  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteSReg, csteQReg, csteDenomReg[1]);
142  simd::detail::AssignReg<IS, T>::act(csteSReg, g_s1());
143  simd::detail::AssignReg<IS, T>::act(csteQReg, g_q2());
144  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteSReg, csteQReg, csteDenomReg[2]);
145  simd::detail::AssignReg<IS, T>::act(csteSReg, g_s2());
146  simd::detail::AssignReg<IS, T>::act(csteQReg, g_q3());
147  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteSReg, csteQReg, csteDenomReg[3]);
148  simd::detail::AssignReg<IS, T>::act(csteSReg, g_s3());
149  simd::detail::AssignReg<IS, T>::act(csteQReg, g_q4());
150  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, csteSReg, csteQReg, csteDenomReg[4]);
151 
152  // compute polynomial approximation
153  Reg zz1Reg, zz2Reg, zzReg;
154  Reg z1Reg, z2Reg, zReg;
155  Reg pReg, denomReg, minusZReg;
156  simd::detail::SubReg<IS, T>::act(cste1Reg, aReg, zz1Reg); // zz = 1.0 - a
157  simd::detail::MulReg<IS, T>::act(aReg, aReg, zz2Reg); // zz = 1.0 - a
158  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, zz1Reg, zz2Reg, zzReg);
159  simd::detail::MulReg<IS, T>::act(zzReg, csteNumReg[0], pReg); // p = zz * (N[5] + zz * (N[4] + zz * (N[3] + zz * (N[2] + zz * (N[1] + zz * N[0]))))) / (D[4] + zz * (D[3] + zz * (D[2] + zz * (D[1] + zz * (D[0] + zz)))))
160  simd::detail::AddReg<IS, T>::act(pReg, csteNumReg[1], pReg);
161  simd::detail::MulReg<IS, T>::act(pReg, zzReg, pReg);
162  simd::detail::AddReg<IS, T>::act(pReg, csteNumReg[2], pReg);
163  simd::detail::MulReg<IS, T>::act(pReg, zzReg, pReg);
164  simd::detail::AddReg<IS, T>::act(pReg, csteNumReg[3], pReg);
165  simd::detail::MulReg<IS, T>::act(pReg, zzReg, pReg);
166  simd::detail::AddReg<IS, T>::act(pReg, csteNumReg[4], pReg);
167  simd::detail::MulReg<IS, T>::act(pReg, zzReg, pReg);
168  simd::detail::AddReg<IS, T>::act(pReg, csteNumReg[5], pReg);
169  simd::detail::MulReg<IS, T>::act(pReg, zzReg, pReg);
170  simd::detail::AddReg<IS, T>::act(zzReg, csteDenomReg[0], denomReg);
171  simd::detail::MulReg<IS, T>::act(denomReg, zzReg, denomReg);
172  simd::detail::AddReg<IS, T>::act(denomReg, csteDenomReg[1], denomReg);
173  simd::detail::MulReg<IS, T>::act(denomReg, zzReg, denomReg);
174  simd::detail::AddReg<IS, T>::act(denomReg, csteDenomReg[2], denomReg);
175  simd::detail::MulReg<IS, T>::act(denomReg, zzReg, denomReg);
176  simd::detail::AddReg<IS, T>::act(denomReg, csteDenomReg[3], denomReg);
177  simd::detail::MulReg<IS, T>::act(denomReg, zzReg, denomReg);
178  simd::detail::AddReg<IS, T>::act(denomReg, csteDenomReg[4], denomReg);
179  simd::detail::DivReg<IS, T>::act(pReg, denomReg, pReg);
180 
181  simd::detail::AddReg<IS, T>::act(zz1Reg, zz1Reg, zz1Reg); // zz = std::sqrt(zz+zz)
182  simd::detail::SqrtReg<IS, T>::act(zz1Reg, zz1Reg);
183  simd::detail::SubReg<IS, T>::act(cstePiD4Reg, zz1Reg, z1Reg); // z = M_PI_4 - zz
184  simd::detail::MulReg<IS, T>::act(zz1Reg, pReg, zz1Reg); // zz = zz * p
185  simd::detail::SubReg<IS, T>::act(z1Reg, zz1Reg, z1Reg); // z = z - zz
186  simd::detail::AddReg<IS, T>::act(z1Reg, cstePiD4Reg, z1Reg); // z = z + M_PI_4
187 
188  simd::detail::MulReg<IS, T>::act(aReg, pReg, z2Reg); // z = a * p + a;
189  simd::detail::AddReg<IS, T>::act(z2Reg, aReg, z2Reg);
190 
191  // select output value
192  simd::detail::IfElseReg<IS, T>::act(test0P625Reg, z1Reg, z2Reg, zReg);
193 
194  // if (x < 0) z = -z;
195  simd::detail::MulReg<IS, T>::act(zReg, csteMinus1Reg, minusZReg);
196  simd::detail::IfElseReg<IS, T>::act(test0Reg, zReg, minusZReg, out);
197  }
198 };
199 
202 
203 } // end of namespace detail
204 } // end of namespace simd
205 } // end of namespace ipsdk
206 
207 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_SINREG_H__
template structure which is specialized to implement the computation of asin function on a scalar or ...
Definition: AsinReg.h:38
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: DivReg.h:39
Definition: SubReg.h:39
Definition: SqrtReg.h:39
abs function; returns the absolute value of a pack
Definition: IsGreaterRegDecl.h:30
Definition: DataItemNodeHdrMacrosDetails.h:48
RegType class.
mul function; returns the product of 2 input pack operandes
sqrt function; returns the square root of a pack
function assigning a given value of type T to a given Pack<instructionSet, T>
div function; returns the quotient of 2 input pack operandes
Definition of import/export macro for library.
sub function; returns the result of an arithmetic substraction on all the elements of 2 input pack op...
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: IfElseReg.h:33
Definition: MulReg.h:39
Definition: RegMaskType.h:29
template structure which is specialized to implement the computation of abs function on a scalar or a...
Definition: AbsReg.h:46
add function; returns the result of an arithmetic add operation on all the elements of 2 input pack o...
Definition: RegType.h:29
Definition: AssignRegDecl.h:31