IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
SignedUnsignedOpReg.h
Go to the documentation of this file.
1 // SignedUnsignedOpReg.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_SIGNEDUNSIGNEDOPREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_SIGNEDUNSIGNEDOPREG_H__
17 
20 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/SignedUnsignedOpReg.h>
25 #include <IPSDKUtil/InstructionSet/Comparison/detail/SSE2/IsGreaterEqualReg.h>
33 
34 namespace ipsdk {
35 namespace simd {
36 namespace detail {
37 
40 
43 template<typename T>
45  T, /* signed */ false>
46 {
47  typedef typename Sse2Type<T>::Type result_type;
48 
49  static IPSDK_FORCEINLINE
50  result_type act(const typename Sse2Type<T>::Type& x,
51  const typename Sse2Type<T>::Type& y)
52  {
54  }
55 
56  static IPSDK_FORCEINLINE
57  void act(const typename Sse2Type<T>::Type& x,
58  const typename Sse2Type<T>::Type& y,
59  result_type& out)
60  {
62  }
63 };
64 
67 template<typename T>
69  T, /* signed */ true>
70 {
71  typedef typename boost::make_unsigned<T>::type unsigned_type;
72  typedef typename Sse2Type<unsigned_type>::Type result_type;
73 
74  static IPSDK_FORCEINLINE
75  result_type act(const typename Sse2Type<T>::Type& x,
76  const typename Sse2Type<T>::Type& y)
77  {
78  const typename Sse2Type<T>::Type zero =
80 
81  const typename Sse2Type<T>::Type yNeg =
83 
84  const typename Sse2Type<T>::Type xNeg =
86 
87  const typename Sse2Type<T>::Type yNegAndXNotNeg =
89 
90  const typename Sse2Type<T>::Type one =
92 
94  yNegAndXNotNeg,
95 
96  // avoid the nasty two's complement case for y == min()
97  // return result_type(x) + result_type(-(y+1)) + 1;
100  x,
103  one),
104 
105  // return result_type(x - y);
107  //return SubReg<eInstructionSet::eIS_Sse2, unsigned_type>::act(x, y);
108  }
109 
110  static IPSDK_FORCEINLINE
111  void act(const typename Sse2Type<T>::Type& x,
112  const typename Sse2Type<T>::Type& y,
113  result_type& out)
114  {
115  const typename Sse2Type<T>::Type zero =
117 
118  const typename Sse2Type<T>::Type one =
120 
121  typename Sse2Type<T>::Type yNeg;
123 
124  typename Sse2Type<T>::Type xNeg;
126 
127  typename Sse2Type<T>::Type yNegAndXNotNeg;
128  BitwiseAndNotReg<eInstructionSet::eIS_Sse2, T>::act(yNeg, xNeg, yNegAndXNotNeg);
129 
130  typename Sse2Type<T>::Type a_t;
131  typename Sse2Type<unsigned_type>::Type a, b;
132 
135  cast<eInstructionSet::eIS_Sse2, T, unsigned_type>(a_t, a);
138 
140 
142  yNegAndXNotNeg, a, b, out);
143  }
144 };
145 
148 template<typename T1, typename T2>
150  T1, T2, /* signed */ false>
151 {
152  typedef typename Sse2Type<T2>::Type result_type;
153 
154  static IPSDK_FORCEINLINE
155  result_type act(const typename Sse2Type<T1>::Type& x,
156  const typename Sse2Type<T2>::Type& y)
157  {
159  }
160 
161  static IPSDK_FORCEINLINE
162  void act(const typename Sse2Type<T1>::Type& x,
163  const typename Sse2Type<T2>::Type& y,
164  result_type& out)
165  {
167  }
168 };
169 
172 template<typename T1, typename T2>
174  T1, T2, /* signed */ true>
175 {
176  typedef typename Sse2Type<T2>::Type result_type;
177 
178  static IPSDK_FORCEINLINE
179  result_type act(const typename Sse2Type<T1>::Type& x,
180  const typename Sse2Type<T2>::Type& y)
181  {
182  const typename Sse2Type<T1>::Type zero =
184 
185  const typename Sse2Type<T2>::Type one =
187 
188  const typename Sse2Type<T1>::Type yNeg =
190 
191  const typename Sse2Type<T1>::Type minusYPlusOne =
194 
195  const typename Sse2Type<T1>::Type xGtThnMinusYPlusOne =
197 
198  const typename Sse2Type<T1>::Type yNegAndXGtThnMinusYPlusOne =
200  yNeg, xGtThnMinusYPlusOne);
201 
202 
204  yNegAndXGtThnMinusYPlusOne,
205 
206  // avoid the nasty two's complement edge case for y == min()
207  // return T2(x - T1(-(y+1)) - 1);
210  x, minusYPlusOne),
211  one),
212  // return T2(x) + y;
214  //return AddReg<eInstructionSet::eIS_Sse2, T2>::act(x, y);
215  }
216 
217  static IPSDK_FORCEINLINE
218  void act(const typename Sse2Type<T1>::Type& x,
219  const typename Sse2Type<T2>::Type& y,
220  result_type& out)
221  {
222  const typename Sse2Type<T1>::Type zero =
224 
225  const typename Sse2Type<T2>::Type one =
227 
228  typename Sse2Type<T1>::Type yNeg;
230 
231  typename Sse2Type<T2>::Type minusYPlusOne_t2;
232  AddReg<eInstructionSet::eIS_Sse2, T2>::act(y, one, minusYPlusOne_t2);
233  UnaryMinusReg<eInstructionSet::eIS_Sse2, T2>::act(minusYPlusOne_t2, minusYPlusOne_t2);
234 
235  typename Sse2Type<T1>::Type minusYPlusOne;
236  cast<eInstructionSet::eIS_Sse2, T2, T1>(minusYPlusOne_t2, minusYPlusOne);
237 
238  typename Sse2Type<T1>::Type xGtThnMinusYPlusOne;
239  IsGreaterReg<eInstructionSet::eIS_Sse2, T1>::act(x, minusYPlusOne, xGtThnMinusYPlusOne);
240 
241  typename Sse2Type<T1>::Type yNegAndXGtThnMinusYPlusOne;
243  yNeg, xGtThnMinusYPlusOne, yNegAndXGtThnMinusYPlusOne);
244 
245  typename Sse2Type<T1>::Type a;
246  SubReg<eInstructionSet::eIS_Sse2, T1>::act(x, minusYPlusOne, a);
248 
249  typename Sse2Type<T2>::Type b;
252  yNegAndXGtThnMinusYPlusOne, a, b, out);
253  }
254 };
255 
258 
259 } // end of namespace detail
260 } // end of namespace simd
261 } // end of namespace ipsdk
262 
263 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_ADDREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: BitwiseAndNotReg.h:30
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
Definition: SubReg.h:39
specialization of ipsdk::simd::detail::SubReg structure for SSE2 instruction set
specialization of ipsdk::simd::detail::UnaryMinusReg structure for SSE2 instruction set ...
Definition: SignedUnsignedOpReg.h:69
Definition: IsGreaterRegDecl.h:30
Definition: BitwiseSelectReg.h:30
specialization of ipsdk::simd::detail::AddReg structure for SSE2 instruction set
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Predefined types for Sse2 instruction set management.
Predefined types associated to instruction set management.
specialization of BitwiseSelectReg structure for SSE2 instruction set
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: UnaryMinusReg.h:40
specialization of BitwiseAndReg structure for SSE2 instruction set
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
Definition: IsLessRegDecl.h:30
structure used to retrieve SSE2 type associated to a base type
Definition: Sse2Types.h:32
Definition: SignedUnsignedOpReg.h:51
Definition: BitwiseAndReg.h:30
Definition: AssignRegDecl.h:31