IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
SinReg.h
Go to the documentation of this file.
1 // SinReg.h:
3 // -------------------
4 //
15 
16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_SINREG_H__
17 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_SINREG_H__
18 
21 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AbsReg.h>
22 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h>
25 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/PolynomReg.h>
35 
36 namespace ipsdk {
37 namespace simd {
38 namespace detail {
39 
42 
44 template <eInstructionSet::domain IS, eInstructionSet::domain ISFma>
45 struct SinReg<IS, ISFma, ipReal32>
46 {
47  static IPSDK_FORCEINLINE
49  act(const typename RegType<IS, ipReal32>::Type& in)
50  {
51  typename RegType<IS, ipReal32>::Type out;
52  act(in, out);
53  return out;
54  }
55 
56  static IPSDK_FORCEINLINE
57  void
58  act(const typename RegType<IS, ipReal32>::Type& in,
59  typename RegType<IS, ipReal32>::Type& out)
60  {
61  typedef typename RegType<IS, ipReal32>::Type RegReal32;
62  typedef typename RegType<IS, ipInt32>::Type RegInt32;
63 
64  RegReal32 x = in;
65  RegReal32 xmm1, xmm2, xmm3, sign_bit;
66  AssignReg<IS, ipReal32>::act(xmm2, 0.0f);
67  RegInt32 emm0, emm2;
68  sign_bit = x;
69  /* take the absolute value */
71  /* extract the sign bit (upper one) */
72  RegInt32 signMaskInt32;
73  AssignReg<IS, ipInt32>::act(signMaskInt32, 0x80000000);
74  RegReal32 signMask;
75  BitwiseCastReg<IS, ipInt32, ipReal32>::act(signMaskInt32, signMask);
76  BitwiseAndReg<IS, ipReal32>::act(sign_bit, signMask, sign_bit);
77 
78  /* scale by 4/Pi */
79  RegReal32 cephesFOPI;
80  AssignReg<IS, ipReal32>::act(cephesFOPI, 1.27323954473516f); // 4 / M_PI
81  MulReg<IS, ipReal32>::act(x, cephesFOPI, out);
82 
83  /* store the integer part of out in mm0 */
85  /* j=(j+1) & (~1) (see the cephes sources) */
86  RegInt32 zero, one, invOne, two;
89  AssignReg<IS, ipInt32>::act(invOne, ~1);
91  AddReg<IS, ipInt32>::act(emm2, one, emm2);
92  BitwiseAndReg<IS, ipInt32>::act(emm2, invOne, emm2);
94 
95  /* get the swap sign flag */
96  RegInt32 four;
98  BitwiseAndReg<IS, ipInt32>::act(emm2, four, emm0);
99  ShiftLeftReg<IS, ipInt32>::act(emm0, 29, emm0);
100  /* get the polynom selection mask
101  there is one polynom for 0 <= x <= Pi/4
102  and another one for Pi/4<x<=Pi/2
103 
104  Both branches will be computed.
105  */
106  BitwiseAndReg<IS, ipInt32>::act(emm2, two, emm2);
107  typename RegMaskType<IS, ipInt32>::Type emm2Mask;
108  IsEqualReg<IS, ipInt32>::act(emm2, zero, emm2Mask);
109  CastReg<IS, ipInt32, ipInt32>::act(emm2Mask, emm2);
110 
111  RegReal32 swap_sign_bit, poly_mask;
112  BitwiseCastReg<IS, ipInt32, ipReal32>::act(emm0, swap_sign_bit);
114  BitwiseXOrReg<IS, ipReal32>::act(sign_bit, swap_sign_bit, sign_bit);
115 
116  /* The magic pass: "Extended precision modular arithmetic"
117  x = ((x - out * DP1) - out * DP2) - out * DP3; */
118  RegReal32 minusCephesDP1, minusCephesDP2, minusCephesDP3;
119  AssignReg<IS, ipReal32>::act(minusCephesDP1, -0.78515625f);
120  AssignReg<IS, ipReal32>::act(minusCephesDP2, -2.4187564849853515625e-4f);
121  AssignReg<IS, ipReal32>::act(minusCephesDP3, -3.77489497744594108e-8f);
122 
123  MulReg<IS, ipReal32>::act(out, minusCephesDP1, xmm1);
124  MulReg<IS, ipReal32>::act(out, minusCephesDP2, xmm2);
125  MulReg<IS, ipReal32>::act(out, minusCephesDP3, xmm3);
126  AddReg<IS, ipReal32>::act(x, xmm1, x);
127  AddReg<IS, ipReal32>::act(x, xmm2, x);
128  AddReg<IS, ipReal32>::act(x, xmm3, x);
129 
130  /* Evaluate the first polynom (0 <= x <= Pi/4) */
131  RegReal32 z;
132  MulReg<IS, ipReal32>::act(x, x, z);
134  z,
135  4.166664568298827E-002f, // coscof_p2
136  -1.388731625493765E-003f, // coscof_p1
137  2.443315711809948E-005f, // coscof_p0
138  out);
139 
140  MulReg<IS, ipReal32>::act(out, z, out);
141  MulReg<IS, ipReal32>::act(out, z, out);
142 
143  RegReal32 halfOne, oneReal32;
144  AssignReg<IS, ipReal32>::act(halfOne, 0.5f);
145  AssignReg<IS, ipReal32>::act(oneReal32, 1.0f);
146  RegReal32 tmp;
147  MulReg<IS, ipReal32>::act(z, halfOne, tmp);
148  SubReg<IS, ipReal32>::act(out, tmp, out);
149  AddReg<IS, ipReal32>::act(out, oneReal32, out);
150 
151  /* Evaluate the second polynom (Pi/4 <= x <= 0) */
152  RegReal32 y2;
154  z,
155  -1.6666654611E-1f, // sincof_p2
156  8.3321608736E-3f, // sincof_p1
157  -1.9515295891E-4f, // sincof_p0
158  y2);
159  MulReg<IS, ipReal32>::act(y2, z, y2);
160  MulReg<IS, ipReal32>::act(y2, x, y2);
161  AddReg<IS, ipReal32>::act(y2, x, y2);
162 
163  /* select the correct result from the two polynoms */
164  BitwiseAndReg<IS, ipReal32>::act(poly_mask, y2, y2);
165  BitwiseAndNotReg<IS, ipReal32>::act(out, poly_mask, out);
166  AddReg<IS, ipReal32>::act(out, y2, out);
167  /* update the sign */
168  BitwiseXOrReg<IS, ipReal32>::act(out, sign_bit, out);
169  }
170 };
171 
174 
175 } // end of namespace detail
176 } // end of namespace simd
177 } // end of namespace ipsdk
178 
179 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_COMMON_SINREG_H__
template structure which is specialized to implement the computation of a polynom of degree 8 applied...
Definition: PolynomReg.h:43
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: BitwiseAndNotReg.h:30
Definition: SubReg.h:39
Definition: IsEqualRegDecl.h:35
IsEqualReg<eInstructionSet::domain instructionSet, typename T, typename Enable=void> structure...
Definition: CastReg.h:30
RegType class.
Definition: ShiftLeftReg.h:30
Definition: BitwiseXOrReg.h:30
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
template structure which is specialized to implement the computation of sine on a scalar or a registe...
Definition: SinReg.h:40
Definition: MulReg.h:39
Definition: RegMaskType.h:29
Definition: BitwiseCastReg.h:29
template structure which is specialized to implement the computation of abs function on a scalar or a...
Definition: AbsReg.h:46
Definition: RegType.h:29
Definition: BitwiseAndReg.h:30
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56