IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
TanReg.h
Go to the documentation of this file.
1 // TanReg.h:
3 // -------------------
4 //
15 
16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_TANREG_H__
17 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_TANREG_H__
18 
22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AbsReg.h>
23 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h>
26 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PolynomReg.h>
36 
37 #include <boost/mpl/not_equal_to.hpp>
38 
39 namespace ipsdk {
40 namespace simd {
41 namespace detail {
42 
45 
47 template <eInstructionSet::domain IS, eInstructionSet::domain ISFma>
48 struct TanReg<IS, ISFma, ipReal32, typename boost::enable_if<typename boost::mpl::not_equal_to<boost::mpl::int_<IS>, boost::mpl::int_<eInstructionSet::eIS_Standard> >::type>::type>
49 {
50  static IPSDK_FORCEINLINE
52  act(const typename RegType<IS, ipReal32>::Type& in)
53  {
54  typename RegType<IS, ipReal32>::Type out;
55  act(in, out);
56  return out;
57  }
58 
59  static IPSDK_FORCEINLINE
60  void
61  act(const typename RegType<IS, ipReal32>::Type& in,
62  typename RegType<IS, ipReal32>::Type& out)
63  {
64  typedef typename RegType<IS, ipReal32>::Type RegReal32;
65  typedef typename RegMaskType<IS, ipReal32>::Type RegMaskReal32;
66 
67  /*RegReal32 regCos, regSin;
68  CosReg<IS, ISFma, ipReal32>::act(in, regCos);
69  SinReg<IS, ISFma, ipReal32>::act(in, regSin);
70  DivReg<IS, ipReal32>::act(regSin, regCos, out);*/
71 
72 #if 1
73  typedef typename RegType<IS, ipInt32>::Type RegInt32;
74  typedef typename RegMaskType<IS, ipInt32>::Type RegMaskInt32;
75  RegReal32 absIn;
76  AbsReg<IS, ipReal32>::act(in, absIn);
77  RegReal32 cephesFOPI;
78  AssignReg<IS, ipReal32>::act(cephesFOPI, 1.27323954473516f); // 4 / M_PI
79  RegReal32 o;
80  MulReg<IS, ipReal32>::act(absIn, cephesFOPI, o);
81 
82  RegInt32 j;
84  // cephes: j=(j+1) & (~1)
85  RegInt32 one, invOne;
87  AssignReg<IS, ipInt32>::act(invOne, ~1);
88  AddReg<IS, ipInt32>::act(j, one, j);
89  BitwiseAndReg<IS, ipInt32>::act(j, invOne, j);
91 
92  // The magic pass: "Extended precision modular arithmetic"
93  // x = ((x - out * DP1) - out * DP2) - out * DP3;
94  RegReal32 minusCephesDP1, minusCephesDP2, minusCephesDP3;
95  AssignReg<IS, ipReal32>::act(minusCephesDP1, -0.78515625f);
96  AssignReg<IS, ipReal32>::act(minusCephesDP2, -2.4187564849853515625e-4f);
97  AssignReg<IS, ipReal32>::act(minusCephesDP3, -3.77489497744594108e-8f);
98 
99  RegReal32 xmm1, xmm2, xmm3;
100  RegReal32 z;
101  MulReg<IS, ipReal32>::act(o, minusCephesDP1, xmm1);
102  MulReg<IS, ipReal32>::act(o, minusCephesDP2, xmm2);
103  MulReg<IS, ipReal32>::act(o, minusCephesDP3, xmm3);
104  AddReg<IS, ipReal32>::act(absIn, xmm1, z);
105  AddReg<IS, ipReal32>::act(z, xmm2, z);
106  AddReg<IS, ipReal32>::act(z, xmm3, z);
107 
108  RegReal32 zz;
109  MulReg<IS, ipReal32>::act(z, z, zz);
110 
112  zz,
113  3.33331568548E-1f,
114  1.33387994085E-1f,
115  5.34112807005E-2f,
116  2.44301354525E-2f,
117  3.11992232697E-3f,
118  9.38540185543E-3f,
119  o);
120  MulReg<IS, ipReal32>::act(o, zz, o);
121  MulReg<IS, ipReal32>::act(o, z, o);
122  AddReg<IS, ipReal32>::act(o, z, o);
123 
124  RegInt32 zero, two;
127  RegInt32 jAndTwo;
128  BitwiseAndReg<IS, ipInt32>::act(j, two, jAndTwo);
129  RegMaskInt32 jAndTwoNotNull;
130  IsNotEqualReg<IS, ipInt32>::act(jAndTwo, zero, jAndTwoNotNull);
131  RegInt32 regFromJAndTwoNotNull;
132  CastReg<IS, ipInt32, ipInt32>::act(jAndTwoNotNull, regFromJAndTwoNotNull);
133  RegReal32 jAndTwoNotNullReal32;
134  BitwiseCastReg<IS, ipInt32, ipReal32>::act(regFromJAndTwoNotNull, jAndTwoNotNullReal32);
135  RegReal32 minusOne;
136  AssignReg<IS, ipReal32>::act(minusOne, -1.0f);
137  RegReal32 minusOneDivByOut;
138  DivReg<IS, ipReal32>::act(minusOne, o, minusOneDivByOut);
139  BitwiseSelectReg<IS, ipReal32>::act(jAndTwoNotNullReal32, minusOneDivByOut, o, o);
140 
141  RegReal32 minusOut;
142  RegMaskReal32 inIsNeg;
143  IsNotEqualReg<IS, ipReal32>::act(in, absIn, inIsNeg);
145  IfElseReg<IS, ipReal32>::act(inIsNeg, minusOut, o, out);
146 #else
147  typedef typename ipsdk::simd::RegType<IS, ipsdk::ipInt32>::Type RegInt32;
148  RegReal32 x = in;
149  /* take the absolute value */
151 
152  /* scale by 4/Pi */
153  RegReal32 cephesFOPI;
154  AssignReg<IS, ipReal32>::act(cephesFOPI, 1.27323954473516f); // 4 / M_PI
155  MulReg<IS, ipReal32>::act(x, cephesFOPI, out);
156 
157  // y = floor(x/PIO4)
158  FloorReg<IS, ipReal32>::act(out, out);
159  RegReal32 oneDivBy8, eight;
160  AssignReg<IS, ipReal32>::act(oneDivBy8, 0.125f);
161  AssignReg<IS, ipReal32>::act(eight, 8.0f);
162  RegReal32 z;
163 
164  // z = ldexp(y, -3)
165  MulReg<IS, ipReal32>::act(out, oneDivBy8, z);
166 
167  // z = floor(z)
169 
170  // z = y - ldexp(z, 3)
171  MulReg<IS, ipReal32>::act(z, eight, z);
172  SubReg<IS, ipReal32>::act(out, z, z);
173 
174  // j = z
175  RegInt32 j;
177  // if(j & 1) { j+=1; y+=1.0}
178  RegInt32 zero, one;
181  RegInt32 jAndOne;
182  BitwiseAndReg<IS, ipInt32>::act(j, one, jAndOne);
183  RegInt32 jAndOneNotNull;
184  IsNotEqualReg<IS, ipInt32>::act(jAndOne, zero, jAndOneNotNull);
185  RegInt32 jPlusOne;
186  AddReg<IS, ipInt32>::act(j, one, jPlusOne);
187  BitwiseSelectReg<IS, ipInt32>::act(jAndOneNotNull, jPlusOne, j, j);
188  RegReal32 jAndOneNotNullReal32;
189  BitwiseCastReg<IS, ipInt32, ipReal32>::act(jAndOneNotNull, jAndOneNotNullReal32);
190  RegReal32 oneReal32;
191  AssignReg<IS, ipReal32>::act(oneReal32, 1.0f);
192  RegReal32 outPlusOne;
193  AddReg<IS, ipReal32>::act(out, oneReal32, outPlusOne);
194  BitwiseSelectReg<IS, ipReal32>::act(jAndOneNotNullReal32, outPlusOne, out, out);
195 
196  /* The magic pass: "Extended precision modular arithmetic"
197  x = ((x - out * DP1) - out * DP2) - out * DP3; */
198  RegReal32 minusCephesDP1, minusCephesDP2, minusCephesDP3;
199  AssignReg<IS, ipReal32>::act(minusCephesDP1, -0.78515625f);
200  AssignReg<IS, ipReal32>::act(minusCephesDP2, -2.4187564849853515625e-4f);
201  AssignReg<IS, ipReal32>::act(minusCephesDP3, -3.77489497744594108e-8f);
202 
203  /* The magic pass: "Extended precision modular arithmetic"
204  x = ((x - out * DP1) - out * DP2) - out * DP3; */
205  RegReal32 xmm1, xmm2, xmm3;
206  MulReg<IS, ipReal32>::act(out, minusCephesDP1, xmm1);
207  MulReg<IS, ipReal32>::act(out, minusCephesDP2, xmm2);
208  MulReg<IS, ipReal32>::act(out, minusCephesDP3, xmm3);
209  AddReg<IS, ipReal32>::act(x, xmm1, z);
210  AddReg<IS, ipReal32>::act(z, xmm2, z);
211  AddReg<IS, ipReal32>::act(z, xmm3, z);
212 
213  // zz = z*z
214  RegReal32 zz;
215  MulReg<IS, ipReal32>::act(z, z, zz);
216 
217  RegReal32 numPoly;
219  zz,
220  -1.79565251976484877988E7f, // p2
221  1.15351664838587416140E6f, // p1
222  -1.30936939181383777646E4f, // p0
223  numPoly);
224  MulReg<IS, ipReal32>::act(numPoly, zz, numPoly);
225  MulReg<IS, ipReal32>::act(numPoly, z, numPoly);
226 
227  RegReal32 denPoly;
229  zz,
230  -5.38695755929454629881E7f, // q4
231  2.50083801823357915839E7f, // q3
232  -1.32089234440210967447E6f, // q2
233  1.36812963470692954678E4f, // q1
234  1.00000000000000000000E0f, // q0
235  denPoly);
236 
237  DivReg<IS, ipReal32>::act(numPoly, denPoly, out);
238  AddReg<IS, ipReal32>::act(out, z, out);
239  RegInt32 two;
241  RegInt32 jAndTwo;
242  BitwiseAndReg<IS, ipInt32>::act(j, two, jAndTwo);
243  RegInt32 jAndTwoNotNull;
244  IsNotEqualReg<IS, ipInt32>::act(jAndTwo, zero, jAndTwoNotNull);
245  RegReal32 jAndTwoNotNullReal32;
246  BitwiseCastReg<IS, ipInt32, ipReal32>::act(jAndTwoNotNull, jAndTwoNotNullReal32);
247  RegReal32 minusOne;
248  AssignReg<IS, ipReal32>::act(minusOne, -1.0f);
249  RegReal32 minusOneDivByOut;
250  DivReg<IS, ipReal32>::act(minusOne, out, minusOneDivByOut);
251  BitwiseSelectReg<IS, ipReal32>::act(jAndTwoNotNullReal32, minusOneDivByOut, out, out);
252 
253  RegReal32 inIsNeg, minusOut;
254  IsNotEqualReg<IS, ipReal32>::act(in, x, inIsNeg);
255  UnaryMinusReg<IS, ipReal32>::act(out, minusOut);
256  BitwiseSelectReg<IS, ipReal32>::act(inIsNeg, minusOut, out, out);
257 #endif
258  }
259 };
260 
263 
264 } // end of namespace detail
265 } // end of namespace simd
266 } // end of namespace ipsdk
267 
268 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_TANREG_H__
template structure which is specialized to implement the computation of a polynom of degree 8 applied...
Definition: PolynomReg.h:43
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: DivReg.h:39
Definition: SubReg.h:39
template structure which is specialized to implement the computation of tan function on a scalar or a...
Definition: TanReg.h:39
template structure which is specialized to implement the computation of value rounded to closest even...
Definition: FloorReg.h:36
Definition: CastReg.h:30
Definition: DataItemNodeHdrMacrosDetails.h:48
RegType class.
Definition: BitwiseSelectReg.h:30
RegMaskType class.
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: IfElseReg.h:33
Definition: MulReg.h:39
Definition: IsNotEqualRegDecl.h:30
Definition: UnaryMinusReg.h:40
Definition: RegMaskType.h:29
Definition: BitwiseCastReg.h:29
template structure which is specialized to implement the computation of abs function on a scalar or a...
Definition: AbsReg.h:46
Definition: RegType.h:29
Definition: BitwiseAndReg.h:30
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56