IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
ExpReg.h
Go to the documentation of this file.
1 // ExpReg.h:
3 // -------------------
4 //
15 
16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_EXPREG_H__
17 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_EXPREG_H__
18 
22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h>
23 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FloorReg.h>
28 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PolynomReg.h>
37 
38 #include <limits>
39 
40 namespace ipsdk {
41 namespace simd {
42 namespace detail {
43 
46 
48 template <eInstructionSet::domain IS, eInstructionSet::domain ISFma>
49 struct ExpReg<IS, ISFma, ipReal32>
50 {
51  static IPSDK_FORCEINLINE
53  act(const typename RegType<IS, ipReal32>::Type& in)
54  {
55  typename RegType<IS, ipReal32>::Type out;
56  act(in, out);
57  return out;
58  }
59 
60  static IPSDK_FORCEINLINE
61  void
62  act(const typename RegType<IS, ipReal32>::Type& in,
63  typename RegType<IS, ipReal32>::Type& out)
64  {
65  typedef typename RegType<IS, ipReal32>::Type RegReal32;
66  typedef typename RegMaskType<IS, ipReal32>::Type RegMaskReal32;
67  typedef typename RegType<IS, ipInt32>::Type RegInt32;
68 
69  RegReal32 x = in;
70 
71  RegReal32 tmp, fx;
73 
74  RegReal32 one;
76 
77  RegReal32 expHi, expLo;
78  AssignReg<IS, ipReal32>::act(expHi, 88.3762626647949f);
79  AssignReg<IS, ipReal32>::act(expLo, -88.3762626647949f);
80 
81  MinReg<IS, ipReal32>::act(x, expHi, x);
82  MaxReg<IS, ipReal32>::act(x, expLo, x);
83 
84  /* express exp(x) as exp(g + n*log(2)) */
85  RegReal32 log2EF, halfOne;
86  AssignReg<IS, ipReal32>::act(log2EF, 1.44269504088896341f);
87  AssignReg<IS, ipReal32>::act(halfOne, 0.5f);
88 
89  MulReg<IS, ipReal32>::act(x, log2EF, fx);
90  AddReg<IS, ipReal32>::act(fx, halfOne, fx);
91 
93 
94  RegReal32 cephesExpC1, cephesExpC2, z;
95  AssignReg<IS, ipReal32>::act(cephesExpC1, 0.693359375f);
96  AssignReg<IS, ipReal32>::act(cephesExpC2, -2.12194440e-4f);
97  MulReg<IS, ipReal32>::act(fx, cephesExpC1, tmp);
98  MulReg<IS, ipReal32>::act(fx, cephesExpC2, z);
99  SubReg<IS, ipReal32>::act(x, tmp, x);
100  SubReg<IS, ipReal32>::act(x, z, x);
101 
102  MulReg<IS, ipReal32>::act(x, x, z);
104  x,
105  +5.0000001201E-1f, // cephes_exp_p5
106  +1.6666665459E-1f, // cephes_exp_p4
107  +4.1665795894E-2f, // cephes_exp_p3
108  +8.3334519073E-3f, // cephes_exp_p2
109  +1.3981999507E-3f, // cephes_exp_p1
110  +1.9875691500E-4f, // cephes_exp_p0
111  tmp);
112  MulReg<IS, ipReal32>::act(tmp, z, tmp);
113  AddReg<IS, ipReal32>::act(tmp, x, tmp);
114  AddReg<IS, ipReal32>::act(tmp, one, tmp);
115 
116  /* build 2^n */
117  RegInt32 emm0;
119  RegInt32 Reg127;
120  AssignReg<IS, ipInt32>::act(Reg127, 127);
121  AddReg<IS, ipInt32>::act(emm0, Reg127, emm0);
122  ShiftLeftReg<IS, ipInt32>::act(emm0, 23, emm0);
123  RegReal32 pow2n;
125 
126  MulReg<IS, ipReal32>::act(tmp, pow2n, tmp);
127 
128  // test particular values:
129  // - if in equals to infinity, out must equal to infinity
130  RegReal32 infinity;
131  AssignReg<IS, ipReal32>::act(infinity, std::numeric_limits<ipReal32>::infinity());
132  RegMaskReal32 inEqualsToInfinity;
133  IsEqualReg<IS, ipReal32>::act(in, infinity, inEqualsToInfinity);
134  IfElseReg<IS, ipReal32>::act(inEqualsToInfinity, infinity, tmp, tmp);
135 
136  // - if in equals to NaN, out must equal to Nan
137  RegReal32 nan;
138  RegMaskReal32 inIsNotNaN;
139  AssignReg<IS, ipReal32>::act(nan, std::numeric_limits<ipReal32>::quiet_NaN());
140  // in is different from nan if and only if (in == in) is true
141  IsEqualReg<IS, ipReal32>::act(in, in, inIsNotNaN);
142  IfElseReg<IS, ipReal32>::act(inIsNotNaN, tmp, nan, out);
143  }
144 };
145 
148 
149 } // end of namespace detail
150 } // end of namespace simd
151 } // end of namespace ipsdk
152 
153 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_EXPREG_H__
template structure which is specialized to implement the computation of a polynom of degree 8 applied...
Definition: PolynomReg.h:43
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: MinRegDecl.h:29
template structure which is specialized to implement the minimum computation on 2 scalars or 2 regist...
Definition: SubReg.h:39
Definition: IsEqualRegDecl.h:35
template structure which is specialized to implement the computation of exponential function on a sca...
Definition: ExpReg.h:39
IsEqualReg<eInstructionSet::domain instructionSet, typename T, typename Enable=void> structure...
template structure which is specialized to implement the maximum computation on 2 scalars or 2 regist...
template structure which is specialized to implement the computation of value rounded to closest even...
Definition: FloorReg.h:36
Definition: CastReg.h:30
RegType class.
Definition: ShiftLeftReg.h:30
Definition: MaxRegDecl.h:29
RegMaskType class.
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: IfElseReg.h:33
Definition: MulReg.h:39
Definition: RegMaskType.h:29
Definition: BitwiseCastReg.h:29
Definition: RegType.h:29
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56