IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
LogReg.h
Go to the documentation of this file.
1 // LogReg.h:
3 // -------------------
4 //
16 
17 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_LOGREG_H__
18 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_LOGREG_H__
19 
23 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h>
24 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FrexpReg.h>
28 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PolynomReg.h>
37 
38 namespace ipsdk {
39 namespace simd {
40 namespace detail {
41 
44 
46 template <eInstructionSet::domain IS, eInstructionSet::domain ISFma>
47 struct LogReg<IS, ISFma, ipReal32>
48 {
49  static IPSDK_FORCEINLINE
51  act(const typename RegType<IS, ipReal32>::Type& in)
52  {
53  typename RegType<IS, ipReal32>::Type out;
54  act(in, out);
55  return out;
56  }
57 
58  static IPSDK_FORCEINLINE
59  void
60  act(const typename RegType<IS, ipReal32>::Type& in,
61  typename RegType<IS, ipReal32>::Type& out)
62  {
63  typedef typename RegType<IS, ipReal32>::Type RegReal32;
64  typedef typename RegMaskType<IS, ipReal32>::Type RegMaskReal32;
65  typedef typename RegType<IS, ipInt32>::Type RegInt32;
66 
67  const ipInt32 scalarMinNormPos = 0x00800000;
68  RegReal32 minNormPos;
69  AssignReg<IS, ipReal32>::act(minNormPos, *reinterpret_cast<const ipReal32*>(&scalarMinNormPos));
70 
71  RegReal32 x = in;
72  MaxReg<IS, ipReal32>::act(x, minNormPos, x); /* cut off denormalized stuff */
73 
74  RegInt32 eInt32;
75  FrexpReg<IS, ipReal32>::act(x, x, eInt32);
76  RegReal32 e;
78 
79  RegReal32 one, cephesSqrtHalf;
81  AssignReg<IS, ipReal32>::act(cephesSqrtHalf, 0.707106781186547524f);
82 
83  /* part2:
84  if( x < SQRTHF ) {
85  e -= 1;
86  x = x + x - 1.0;
87  } else { x = x - 1.0; }
88  */
89  RegReal32 regFromMask, tmp, oneAndMask;
90  RegMaskReal32 mask;
91  IsLessReg<IS, ipReal32>::act(x, cephesSqrtHalf, mask);
92  CastReg<IS, ipReal32, ipReal32>::act(mask, regFromMask);
93  BitwiseAndReg<IS, ipReal32>::act(x, regFromMask, tmp);
94  SubReg<IS, ipReal32>::act(x, one, x);
95  BitwiseAndReg<IS, ipReal32>::act(one, regFromMask, oneAndMask);
96  SubReg<IS, ipReal32>::act(e, oneAndMask, e);
97  AddReg<IS, ipReal32>::act(x, tmp, x);
98 
99  RegReal32 x2;
100  MulReg<IS, ipReal32>::act(x, x, x2);
101 
102  RegReal32 y;
104  x,
105  +3.3333331174E-1f, // cephes_log_p8
106  -2.4999993993E-1f, // cephes_log_p7
107  +2.0000714765E-1f, // cephes_log_p6
108  -1.6668057665E-1f, // cephes_log_p5
109  +1.4249322787E-1f, // cephes_log_p4
110  -1.2420140846E-1f, // cephes_log_p3
111  1.1676998740E-1f, // cephes_log_p2
112  -1.1514610310E-1f, // cephes_log_p1
113  7.0376836292E-2f, // cephes_log_p0
114  y);
115 
116  RegReal32 cephesLogQ1, halfOne, cephesLogQ2;
117  AssignReg<IS, ipReal32>::act(cephesLogQ1, -2.12194440e-4f);
118  AssignReg<IS, ipReal32>::act(halfOne, .5f);
119  AssignReg<IS, ipReal32>::act(cephesLogQ2, 0.693359375f);
120 
121  MulReg<IS, ipReal32>::act(y, x, y);
122  MulReg<IS, ipReal32>::act(y, x2, y);
123 
124  MulReg<IS, ipReal32>::act(e, cephesLogQ1, tmp);
125  AddReg<IS, ipReal32>::act(y, tmp, y);
126 
127  MulReg<IS, ipReal32>::act(x2, halfOne, tmp);
128  SubReg<IS, ipReal32>::act(y, tmp, y);
129 
130  MulReg<IS, ipReal32>::act(e, cephesLogQ2, tmp);
131  AddReg<IS, ipReal32>::act(x, y, x);
132  AddReg<IS, ipReal32>::act(x, tmp, tmp);
133 
134  RegReal32 inf, minusInf, minusEps, eps, nan;
135  RegMaskReal32 inIsNull, inIsNeg, inIsNan, inIsInf;
136  AssignReg<IS, ipReal32>::act(inf, std::numeric_limits<ipReal32>::infinity());
137  AssignReg<IS, ipReal32>::act(minusInf, -std::numeric_limits<ipReal32>::infinity());
138  AssignReg<IS, ipReal32>::act(nan, std::numeric_limits<ipReal32>::quiet_NaN());
141  IsLessReg<IS, ipReal32>::act(in, eps, inIsNull);
142  IsLessReg<IS, ipReal32>::act(in, minusEps, inIsNeg);
143  IsEqualReg<IS, ipReal32>::act(in, inf, inIsInf);
144  IsNotEqualReg<IS, ipReal32>::act(in, in, inIsNan);
145  IfElseReg<IS, ipReal32>::act(inIsNull, minusInf, tmp, tmp);
146  IfElseReg<IS, ipReal32>::act(inIsNeg, nan, tmp, tmp);
147  IfElseReg<IS, ipReal32>::act(inIsInf, inf, tmp, tmp);
148  IfElseReg<IS, ipReal32>::act(inIsNan, nan, tmp, out);
149  }
150 };
151 
154 
155 } // end of namespace detail
156 } // end of namespace simd
157 } // end of namespace ipsdk
158 
159 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_LOGREG_H__
template structure which is specialized to implement the computation of a polynom of degree 8 applied...
Definition: PolynomReg.h:43
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: SubReg.h:39
Definition: IsEqualRegDecl.h:35
IsEqualReg<eInstructionSet::domain instructionSet, typename T, typename Enable=void> structure...
template structure which is specialized to implement the maximum computation on 2 scalars or 2 regist...
template structure which is specialized to implement the decomposition of input floating value f into...
Definition: FrexpReg.h:37
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
Definition: CastReg.h:30
IPSDK_FORCEINLINE PackT min(const PackT &in1, const PackT &in2)
returns the minimum of 2 packs
Definition: min.h:40
RegType class.
Definition: MaxRegDecl.h:29
RegMaskType class.
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: IfElseReg.h:33
Definition: MulReg.h:39
Definition: IsNotEqualRegDecl.h:30
Definition: RegMaskType.h:29
Definition: IsLessRegDecl.h:30
Definition: RegType.h:29
Definition: BitwiseAndReg.h:30
template structure which is specialized to implement the computation of natural logarithm on a scalar...
Definition: LogReg.h:40
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56