IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
PowNoCheckReg.h
Go to the documentation of this file.
1 // PowNoCheckReg.h:
3 // -------------------
4 //
13 
14 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_POWNOCHECKREG_H__
15 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_POWNOCHECKREG_H__
16 
19 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AbsReg.h>
20 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h>
21 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/ConditionalAddReg.h>
22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FloorReg.h>
23 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FrexpReg.h>
25 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FmaddReg.h>
26 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/LdexpReg.h>
28 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PolynomReg.h>
29 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PowNoCheckReg.h>
47 
49 
50 namespace ipsdk {
51 namespace simd {
52 namespace detail {
53 
56 
58 template <eInstructionSet::domain IS, eInstructionSet::domain ISFma>
59 struct PowNoCheckReg<IS, ISFma, ipReal32>
60 {
61 
62 
63  static IPSDK_FORCEINLINE
65  act(const typename RegType<IS, ipReal32>::Type& base,
66  const typename RegType<IS, ipReal32>::Type& exp)
67  {
68  typename RegType<IS, ipReal32>::Type out;
69  act(base, exp, out);
70  return out;
71  }
72 
73  static IPSDK_FORCEINLINE
74  void
75  reduc(
76  const typename RegType<IS, ipReal32>::Type& in,
77  typename RegType<IS, ipReal32>::Type& out)
78  {
79  typedef typename RegType<IS, ipReal32>::Type Reg;
80  Reg oneOverSixteen, sixteen;
81  AssignReg<IS, ipReal32>::act(oneOverSixteen, 0.0625f);
82  AssignReg<IS, ipReal32>::act(sixteen, 16.0f);
83  MulReg<IS, ipReal32>::act(in, sixteen, out);
85  MulReg<IS, ipReal32>::act(oneOverSixteen, out, out);
86  }
87 
88  static IPSDK_FORCEINLINE
89  void
90  act(const typename RegType<IS, ipReal32>::Type& base,
91  const typename RegType<IS, ipReal32>::Type& exp,
92  typename RegType<IS, ipReal32>::Type& out)
93  {
94  // 2^(-i/16) The decimal values are rounded to 24 - bit precision
95  static ipReal32 A[] = {
96  1.00000000000000000000E0f,
97  9.57603275775909423828125E-1f,
98  9.17004048824310302734375E-1f,
99  8.78126084804534912109375E-1f,
100  8.40896427631378173828125E-1f,
101  8.05245161056518554687500E-1f,
102  7.71105408668518066406250E-1f,
103  7.38413095474243164062500E-1f,
104  7.07106769084930419921875E-1f,
105  6.77127778530120849609375E-1f,
106  6.48419797420501708984375E-1f,
107  6.20928883552551269531250E-1f,
108  5.94603538513183593750000E-1f,
109  5.69394290447235107421875E-1f,
110  5.45253872871398925781250E-1f,
111  5.22136867046356201171875E-1f,
112  5.00000000000000000000E-1f
113  };
114 
115  // continuation, for even i only 2 ^ (i / 16) = A[i] + B[i / 2]
116  static ipReal32 B[] = {
117  0.00000000000000000000E0f,
118  -5.61963907099083340520586E-9f,
119  -1.23776636307969995237668E-8f,
120  4.03545234539989593104537E-9f,
121  1.21016171044789693621048E-8f,
122  -2.00949968760174979411038E-8f,
123  1.89881769396087499852802E-8f,
124  -6.53877009617774467211965E-9f,
125  0.00000000000000000000E0f
126  };
127 
128  // 1 / A[i] The decimal values are full precision
129  static ipReal32 Ainv[] = {
130  1.00000000000000000000000E0f,
131  1.04427378242741384032197E0f,
132  1.09050773266525765920701E0f,
133  1.13878863475669165370383E0f,
134  1.18920711500272106671750E0f,
135  1.24185781207348404859368E0f,
136  1.29683955465100966593375E0f,
137  1.35425554693689272829801E0f,
138  1.41421356237309504880169E0f,
139  1.47682614593949931138691E0f,
140  1.54221082540794082361229E0f,
141  1.61049033194925430817952E0f,
142  1.68179283050742908606225E0f,
143  1.75625216037329948311216E0f,
144  1.83400808640934246348708E0f,
145  1.91520656139714729387261E0f,
146  2.00000000000000000000000E0f
147  };
148 
149  typedef typename RegType<IS, ipReal32>::Type Reg;
150  typedef typename RegMaskType<IS, ipReal32>::Type RegMask;
151  typedef typename RegType<IS, ipInt32>::Type RegInt32;
152  typedef typename RegMaskType<IS, ipInt32>::Type RegMaskInt32;
153 
154  const ipReal32 MAXNUMF = 3.4028234663852885981170418348451692544e38f;
155 
156  Reg baseAbs;
157  AbsReg<IS, ipReal32>::act(base, baseAbs);
158 
159  Reg expFloored;
160  FloorReg<IS, ipReal32>::act(exp, expFloored);
161  Reg expFlooredAbs;
162  AbsReg<IS, ipReal32>::act(expFloored, expFlooredAbs);
163 
164  Reg x;
165  RegInt32 e;
166  FrexpReg<IS, ipReal32>::act(baseAbs, x, e);
167  RegInt32 i;
168 
169  // i = 1
170  // if(x<=A[9]) i=9
171  RegMask xLEA9;
172  IsLessEqualReg<IS, ipReal32>::act(x, A[9], xLEA9);
173  RegMaskInt32 xLEA9Int32;
176 
177  // if(x<=A[i+4]) i+=4
178  RegInt32 four;
180  RegInt32 ip4;
181  AddReg<IS, ipInt32>::act(i, four, ip4);
182  Reg aip4;
183  GatherReg<IS, ipReal32>::act(A, ip4, aip4);
184  RegMask xLEAip4;
185  IsLessEqualReg<IS, ipReal32>::act(x, aip4, xLEAip4);
186  RegMaskInt32 xLEAip4Int32;
187  BitwiseCastReg<IS, ipReal32, ipInt32>::act(xLEAip4, xLEAip4Int32);
188  ConditionalAddReg<IS, ipInt32>::act(xLEAip4Int32, i, four, i);
189 
190  // if(x<=A[i+2]) i+=2
191  RegInt32 two;
193  RegInt32 ip2;
194  AddReg<IS, ipInt32>::act(i, two, ip2);
195  Reg aip2;
196  GatherReg<IS, ipReal32>::act(A, ip2, aip2);
197  RegMask xLEAip2;
198  IsLessEqualReg<IS, ipReal32>::act(x, aip2, xLEAip2);
199  RegMaskInt32 xLEAip2Int32;
200  BitwiseCastReg<IS, ipReal32, ipInt32>::act(xLEAip2, xLEAip2Int32);
201  ConditionalAddReg<IS, ipInt32>::act(xLEAip2Int32, i, two, i);
202 
203  // if(x>=A[1]) i=-1
204  RegMask xGEA1;
206  RegMaskInt32 xGEA1Int32;
209 
210  // i+=1
212 
213  // x -= A[i];
214  Reg ai;
216  SubReg<IS, ipReal32>::act(x, ai, x);
217 
218  // x-= B[i>>1];
219  RegInt32 isr1;
221  Reg bisr1;
222  GatherReg<IS, ipReal32>::act(B, isr1, bisr1);
223  SubReg<IS, ipReal32>::act(x, bisr1, x);
224 
225  // x*=Ainv[i]
226  Reg ainvi;
227  GatherReg<IS, ipReal32>::act(Ainv, i, ainvi);
228  MulReg<IS, ipReal32>::act(x, ainvi, x);
229 
230  // z = x*x;
231  Reg z;
232  MulReg<IS, ipReal32>::act(x, x, z);
233 
234  Reg w;
236  x,
237  0.3333331095506474f,
238  -0.2500006373383951f,
239  0.2003770364206271f,
240  -0.1663883081054895f,
241  w);
242  MulReg<IS, ipReal32>::act(w, x, w);
243  MulReg<IS, ipReal32>::act(w, z, w);
244 
245  // w -= 0.5 * z;
246  Reg halfZ;
248  SubReg<IS, ipReal32>::act(w, halfZ, w);
249 
250  // w = w + LOG2EA * w;
251  Reg log2ea;
252  AssignReg<IS, ipReal32>::act(log2ea, 0.44269504088896340736F);
253  FmaddReg<ISFma, ipReal32>::act(log2ea, w, w, w);
254 
255  // z = w + LOG2EA * x;
256  FmaddReg<ISFma, ipReal32>::act(log2ea, x, w, z);
257  // z = z + x;
258  AddReg<IS, ipReal32>::act(z, x, z);
259 
260  // w = -i;
261  // w *= 0.0625;
264 
265  // w += e;
266  Reg eReal32;
268  AddReg<IS, ipReal32>::act(w, eReal32, w);
269 
270  Reg ya, yb;
271 
272  // ya = reduc(y);
273  reduc(exp, ya);
274 
275  // yb = y - ya;
276  SubReg<IS, ipReal32>::act(exp, ya, yb);
277 
278  // W = z * y + w * yb;
279  Reg zMulY;
280  MulReg<IS, ipReal32>::act(z, exp, zMulY);
281  Reg W;
282  FmaddReg<IS, ipReal32>::act(w, yb, zMulY, W);
283 
284 
285  // Wa = reduc(W);
286  Reg Wa;
287  reduc(W, Wa);
288 
289  // Fb = F - Fa;
290  Reg Wb;
291  SubReg<IS, ipReal32>::act(W, Wa, Wb);
292 
293  // G = Fa + w * ya;
294  FmaddReg<IS, ipReal32>::act(w, ya, Wa, W);
295 
296  // Ga = reduc(G);
297  reduc(W, Wa);
298 
299  // Gb = G-Ga
300  Reg u;
301  SubReg<IS, ipReal32>::act(W, Wa, u);
302 
303  // H = Fb + Gb
304  AddReg<IS, ipReal32>::act(Wb, u, W);
305 
306  // Ha = reduc(H);
307  reduc(W, Wb);
308 
309  // w = 16*(Ga+Ha)
310  Reg sixteen;
311  AssignReg<IS, ipReal32>::act(sixteen, 16.0f);
312  Reg gaPlusHa;
313  AddReg<IS, ipReal32>::act(Wa, Wb, gaPlusHa);
314  MulReg<IS, ipReal32>::act(sixteen, gaPlusHa, w);
315 
316  /*RegMask wGreaterThanMExp;
317  IsGreaterReg<IS, ipReal32>::act(w, AssignReg<IS, ipReal32>::act(2048.0f), wGreaterThanMExp);
318  RegMask wLowerThanMNExp;
319  IsLessReg<IS, ipReal32>::act(w, AssignReg<IS, ipReal32>::act(-2400.0f), wLowerThanMNExp);*/
320 
321  // TODO:
322  // if(w>MEXP) return MAXNUMF;
323  // if(w<MNEXP) return 0.0;
324 
325  // e = w;
327 
328  // Hb = H-Ha;
329  SubReg<IS, ipReal32>::act(W, Wb, Wb);
330 
331  // if(Hb > 0.0) { e+=1; Hb=0.0625; }
332  Reg zero;
333  AssignReg<IS, ipReal32>::act(zero, 0.0f);
334  RegMask HbPos;
335  IsGreaterReg<IS, ipReal32>::act(Wb, zero, HbPos);
336  RegMaskInt32 HbPosInt32;
338  ConditionalAddReg<IS, ipInt32>::act(HbPosInt32, e, 1, e);
339  ConditionalAddReg<IS, ipReal32>::act(HbPos, Wb, -0.0625f, Wb);
340 
342  Wb,
343  6.931471791490764E-001f,
344  2.402262883964191E-001f,
345  5.549356188719141E-002f,
346  9.416993633606397E-003f,
347  z);
348  MulReg<IS, ipReal32>::act(z, Wb, z);
349 
350  // if (e<0) i=-(-e>>4); else i= (e>>4)+1;
351  RegInt32 esr4;
353  RegInt32 mesr4;
355  ShiftRightReg<IS, ipInt32>::act(mesr4, 4, mesr4);
356  UnaryMinusReg<IS, ipInt32>::act(mesr4, mesr4);
357  RegInt32 esr4Plus1;
359  RegMaskInt32 eNeg;
361  IfElseReg<IS, ipInt32>::act(eNeg, mesr4, esr4Plus1, i);
362  /*RegInt32 esr4;
363  DivReg<IS, ipInt32>::act(e, AssignReg<IS, ipInt32>::act(16), esr4);
364  RegInt32 ePos;
365  IsGreaterEqualReg<IS, ipInt32>::act(e, AssignReg<IS, ipInt32>::act(0), ePos);
366  ConditionalAddReg<IS, ipInt32>::act(ePos, esr4, AssignReg<IS, ipInt32>::act(1), i);*/
367 
368  // e = (i << 4) - e;
369  RegInt32 isl4;
370  ShiftLeftReg<IS, ipInt32>::act(i, 4, isl4);
371  SubReg<IS, ipInt32>::act(isl4, e, e);
372 
373  // w = A[e]
375 
376  // z = w + w*z
377  FmaddReg<IS, ipReal32>::act(w, z, w, z);
378 
379  // z = std::ldexp(z, i)
381 
382  // pow(x, y) with x negative and y integral returns -pow(-x, y)
383  // pow(x, y) with x negative and y non integral returns nan
384  RegMask baseIsNegative;
385  IsLessReg<IS, ipReal32>::act(base, zero, baseIsNegative);
386  RegMask expIsNonIntegral;
387  IsNotEqualReg<IS, ipReal32>::act(exp, expFloored, expIsNonIntegral);
388  Reg minusOut;
390  Reg nan;
391  AssignReg<IS, ipReal32>::act(nan, std::numeric_limits<ipReal32>::quiet_NaN());
392  Reg minusOutIfExpIsIntElseNan;
393  IfElseReg<IS, ipReal32>::act(expIsNonIntegral, nan, minusOut, minusOutIfExpIsIntElseNan);
394  IfElseReg<IS, ipReal32>::act(baseIsNegative, minusOutIfExpIsIntElseNan, z, z);
395 
396  // pow(0.0, 0.0) equals to 1.0
397  // pow(0.0, y) (with y!=0) equals to 0.0
398  RegMask baseIsNull;
399  IsEqualReg<IS, ipReal32>::act(base, zero, baseIsNull);
400  RegMask expIsNull;
401  IsEqualReg<IS, ipReal32>::act(exp, zero, expIsNull);
402  Reg oneIfExpIsNullElseZero;
403  IfElse0Reg<IS, ipReal32>::act(expIsNull, AssignReg<IS, ipReal32>::act(1.0f), oneIfExpIsNullElseZero);
404  IfElseReg<IS, ipReal32>::act(baseIsNull, oneIfExpIsNullElseZero, z, out);
405  }
406 };
407 
410 
411 } // end of namespace detail
412 } // end of namespace simd
413 } // end of namespace ipsdk
414 
415 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_POWREG_H__
template structure which is specialized to implement the computation of a polynom of degree 8 applied...
Definition: PolynomReg.h:43
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: FmaddReg.h:37
Definition: SubReg.h:39
Definition: IsEqualRegDecl.h:35
IsEqualReg<eInstructionSet::domain instructionSet, typename T, typename Enable=void> structure...
template structure which is specialized to implement the decomposition of input floating value f into...
Definition: FrexpReg.h:37
Definition: IsGreaterRegDecl.h:30
template structure which is specialized to implement the computation of pow function on scalars or re...
Definition: PowNoCheckReg.h:38
template structure which is specialized to implement the computation of value rounded to closest even...
Definition: FloorReg.h:36
Definition: CastReg.h:30
Definition: IsGreaterEqualRegDecl.h:30
RegType class.
Definition: IsLessEqualRegDecl.h:30
Definition: ShiftLeftReg.h:30
Definition: IfElse0Reg.h:33
Definition: ConditionalAddReg.h:36
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: IfElseReg.h:33
Definition: MulReg.h:39
Definition: IsNotEqualRegDecl.h:30
Definition: UnaryMinusReg.h:40
Definition: RegMaskType.h:29
Definition: BitwiseCastReg.h:29
template structure which is specialized to implement the computation of abs function on a scalar or a...
Definition: AbsReg.h:46
Definition: ShiftRightReg.h:30
template structure which is specialized to implement the computation of ldexp function on scalars or ...
Definition: LdexpReg.h:38
Definition: IsLessRegDecl.h:30
Definition: RegType.h:29
Definition: GatherReg.h:30
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56