IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
CastPack.h
Go to the documentation of this file.
1 // CastPack.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__
17 
26 
27 #include <boost/type_traits/is_signed.hpp>
28 
29 namespace ipsdk {
30 namespace simd {
31 namespace detail {
32 
35 
36 template <typename TIn, typename TOut>
37 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
38  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==1>::type>
39 {
41  IPSDK_FORCEINLINE
43  {
46  return out;
47  }
48 
49  static
50  IPSDK_FORCEINLINE
51  void
54  {
56  }
57 
58  static
59  IPSDK_FORCEINLINE
60  void
61  act(const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
63  {
64  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[0], out._val[0]);
65  }
66 };
67 
68 template <typename TIn, typename TOut>
69 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
70  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==2>::type>
71 {
73  IPSDK_FORCEINLINE
75  {
79  return out;
80  }
81 
82  static
83  IPSDK_FORCEINLINE
84  void
87  {
90  }
91 
92  static
93  IPSDK_FORCEINLINE
94  void
95  act(const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
97  {
98  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[0], out._val[0]);
99  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[1], out._val[1]);
100  }
101 };
102 
103 template <typename TIn, typename TOut>
104 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
105  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==4>::type>
106 {
108  IPSDK_FORCEINLINE
109  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
110  {
116  return out;
117  }
118 
119  static
120  IPSDK_FORCEINLINE
121  void
122  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
124  {
129  }
130 
131  static
132  IPSDK_FORCEINLINE
133  void
134  act(const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
136  {
137  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[0], out._val[0]);
138  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[1], out._val[1]);
139  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[2], out._val[2]);
140  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[3], out._val[3]);
141  }
142 };
143 
144 template <typename TIn, typename TOut>
145 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
146  typename boost::enable_if_c<sizeof(TIn)==8 && sizeof(TOut)==8>::type>
147 {
149  IPSDK_FORCEINLINE
150  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
151  {
161  return out;
162  }
163 
164  static
165  IPSDK_FORCEINLINE
166  void
167  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
169  {
178  }
179 
180  static
181  IPSDK_FORCEINLINE
182  void
183  act(const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
185  {
186  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[0], out._val[0]);
187  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[1], out._val[1]);
188  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[2], out._val[2]);
189  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[3], out._val[3]);
190  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[4], out._val[4]);
191  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[5], out._val[5]);
192  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[6], out._val[6]);
193  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(inMask._val[7], out._val[7]);
194  }
195 };
196 
197 template <typename TIn, typename TOut>
198 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
199  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==2>::type>
200 {
201  static IPSDK_FORCEINLINE
203  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
204  {
206  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
207  return out;
208  }
209 
210  static
211  IPSDK_FORCEINLINE
212  void
213  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
215  {
216  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
217  }
218 };
219 
220 template <typename TIn, typename TOut>
221 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
222  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==4>::type>
223 {
224  static IPSDK_FORCEINLINE
226  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
227  {
229  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
230  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[1], out._val[2], out._val[3]);
231  return out;
232  }
233 
234  static
235  IPSDK_FORCEINLINE
236  void
237  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
239  {
240  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
241  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(in._val[1], out._val[2], out._val[3]);
242  }
243 };
244 
245 template <typename TIn, typename TOut>
246 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
247  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==4>::type>
248 {
249  static IPSDK_FORCEINLINE
251  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
252  {
253  typedef typename UpperType<TIn>::Type T16;
255  CastReg<eInstructionSet::eIS_Avx2, TIn, T16>::act(in._val[0], in16._val[0], in16._val[1]);
257  CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(in16._val[0], out._val[0], out._val[1]);
258  CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(in16._val[1], out._val[2], out._val[3]);
259  return out;
260  }
261 
262  static
263  IPSDK_FORCEINLINE
264  void
265  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
267  {
268  typedef typename UpperType<TIn>::Type T16;
271  in._val[0], in16._val[0], in16._val[1]);
272 
274  in16._val[0], out._val[0], out._val[1]);
276  in16._val[1], out._val[2], out._val[3]);
277  }
278 };
279 
280 template <typename TIn, typename TOut>
281 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
282  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==8>::type>
283 {
284  static IPSDK_FORCEINLINE
286  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
287  {
290  Cast::act(in._val[0], out._val[0], out._val[1]);
291  Cast::act(in._val[1], out._val[2], out._val[3]);
292  Cast::act(in._val[2], out._val[4], out._val[5]);
293  Cast::act(in._val[3], out._val[6], out._val[7]);
294  return out;
295  }
296 
297  static
298  IPSDK_FORCEINLINE
299  void
300  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
302  {
304  Cast::act(in._val[0], out._val[0], out._val[1]);
305  Cast::act(in._val[1], out._val[2], out._val[3]);
306  Cast::act(in._val[2], out._val[4], out._val[5]);
307  Cast::act(in._val[3], out._val[6], out._val[7]);
308  }
309 };
310 
311 template <typename TIn, typename TOut>
312 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
313  typename boost::enable_if_c<sizeof(TIn) <= 2 && sizeof(TOut)==8>::type>
314 {
315  static IPSDK_FORCEINLINE
317  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
318  {
319  const BasePack<ePackType::ePT_Avx, ipInt32> packInt32 =
322  }
323 
324  static
325  IPSDK_FORCEINLINE
326  void
327  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
329  {
331  CastPack<eInstructionSet::eIS_Avx2, TIn, ipInt32>::act(in, packInt32);
332  CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(packInt32, out);
333  }
334 };
335 
336 template <typename TIn, typename TOut>
337 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
338  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==1>::type>
339 {
340  static IPSDK_FORCEINLINE
341  BasePack<ePackType::ePT_Avx, TOut>
342  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
343  {
344  BasePack<ePackType::ePT_Avx, TOut> out;
345  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
346  in._val[0], in._val[1], out._val[0]);
347  return out;
348  }
349 
350  static
351  IPSDK_FORCEINLINE
352  void
353  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
354  BasePack<ePackType::ePT_Avx, TOut>& out)
355  {
356  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
357  in._val[0], in._val[1], out._val[0]);
358  }
359 };
360 
361 template <typename TIn, typename TOut>
362 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
363  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==2>::type>
364 {
365  static IPSDK_FORCEINLINE
366  BasePack<ePackType::ePT_Avx, TOut>
367  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
368  {
369  BasePack<ePackType::ePT_Avx, TOut> out;
370  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
371  in._val[0], in._val[1], out._val[0]);
372  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
373  in._val[2], in._val[3], out._val[1]);
374  return out;
375  }
376 
377  static
378  IPSDK_FORCEINLINE
379  void
380  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
381  BasePack<ePackType::ePT_Avx, TOut>& out)
382  {
383  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
384  in._val[0], in._val[1], out._val[0]);
385  CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
386  in._val[2], in._val[3], out._val[1]);
387  }
388 };
389 
390 template <typename TIn, typename TOut>
391 struct CastPack<eInstructionSet::eIS_Avx2, TIn, TOut,
392  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==1>::type>
393 {
394  static IPSDK_FORCEINLINE
395  BasePack<ePackType::ePT_Avx, TOut>
396  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
397  {
398  typedef typename UpperType<TOut>::Type T16;
399  BasePack<ePackType::ePT_Avx, T16> in16;
400  CastPack<eInstructionSet::eIS_Avx2, TIn, T16>::act(in, in16);
401  BasePack<ePackType::ePT_Avx, TOut> out;
402  CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(
403  in16._val[0], in16._val[1], out._val[0]);
404  return out;
405  }
406 
407  static
408  IPSDK_FORCEINLINE
409  void
410  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
411  BasePack<ePackType::ePT_Avx, TOut>& out)
412  {
413  typedef typename UpperType<TOut>::Type T16;
414  BasePack<ePackType::ePT_Avx, T16> in16;
415  CastPack<eInstructionSet::eIS_Avx2, TIn, T16>::act(in, in16);
416  CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(
417  in16._val[0], in16._val[1], out._val[0]);
418  }
419 };
420 /*
421 template <>
422 struct CastPack<eInstructionSet::eIS_Avx2, ipReal32, ipUInt64>
423 {
424  static IPSDK_FORCEINLINE
425  BasePack<ePackType::ePT_Avx, ipUInt64>
426  act(const BasePack<ePackType::ePT_Avx, ipReal32>& in)
427  {
428  return CastPack<eInstructionSet::eIS_Avx2, ipUInt32, ipUInt64>::act(
429  CastPack<eInstructionSet::eIS_Avx2, ipReal32, ipUInt32>::act(in));
430  }
431 
432  static IPSDK_FORCEINLINE
433  void
434  act(const BasePack<ePackType::ePT_Avx, ipReal32>& in,
435  BasePack<ePackType::ePT_Avx, ipUInt64>& out)
436  {
437  BasePack<ePackType::ePT_Avx, ipUInt32> interm;
438  CastPack<eInstructionSet::eIS_Avx2, ipReal32, ipUInt32>::act(in, interm);
439  CastPack<eInstructionSet::eIS_Avx2, ipUInt32, ipUInt64>::act(interm, out);
440  }
441 };*/
442 
443 template <>
444 struct CastPack<eInstructionSet::eIS_Avx2, ipReal64, ipReal32>
445 {
446  static IPSDK_FORCEINLINE
447  BasePack<ePackType::ePT_Avx, ipReal32>
448  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in)
449  {
450  BasePack<ePackType::ePT_Avx, ipReal32> out;
451  typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, ipReal32> Cast;
452 
453  Cast::act(in._val[0], in._val[1], out._val[0]);
454  Cast::act(in._val[2], in._val[3], out._val[1]);
455  Cast::act(in._val[4], in._val[5], out._val[2]);
456  Cast::act(in._val[6], in._val[7], out._val[3]);
457  return out;
458  }
459 
460  static
461  IPSDK_FORCEINLINE
462  void
463  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in,
464  BasePack<ePackType::ePT_Avx, ipReal32>& out)
465  {
466  typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, ipReal32> Cast;
467  Cast::act(in._val[0], in._val[1], out._val[0]);
468  Cast::act(in._val[2], in._val[3], out._val[1]);
469  Cast::act(in._val[4], in._val[5], out._val[2]);
470  Cast::act(in._val[6], in._val[7], out._val[3]);
471  }
472 };
473 
474 template <typename TOut>
475 struct CastPack<eInstructionSet::eIS_Avx2, ipReal64, TOut,
476  typename boost::enable_if_c<
477  sizeof(TOut)==4 &&
478  boost::is_integral<TOut>::value
479  >::type
480 >
481 {
482  static IPSDK_FORCEINLINE
483  BasePack<ePackType::ePT_Avx, TOut>
484  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in)
485  {
486  BasePack<ePackType::ePT_Avx, TOut> out;
487  typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, TOut> Cast;
488 
489  Cast::act(in._val[0], in._val[1], out._val[0]);
490  Cast::act(in._val[2], in._val[3], out._val[1]);
491  Cast::act(in._val[4], in._val[5], out._val[2]);
492  Cast::act(in._val[6], in._val[7], out._val[3]);
493  return out;
494  }
495 
496  static
497  IPSDK_FORCEINLINE
498  void
499  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in,
500  BasePack<ePackType::ePT_Avx, TOut>& out)
501  {
502  typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, TOut> Cast;
503  Cast::act(in._val[0], in._val[1], out._val[0]);
504  Cast::act(in._val[2], in._val[3], out._val[1]);
505  Cast::act(in._val[4], in._val[5], out._val[2]);
506  Cast::act(in._val[6], in._val[7], out._val[3]);
507  }
508 };
509 
510 template <typename TOut>
511 struct CastPack<eInstructionSet::eIS_Avx2, ipReal64, TOut,
512  typename boost::enable_if_c<
513  sizeof(TOut)<4 &&
514  boost::is_integral<TOut>::value
515  >::type
516 >
517 {
518  static IPSDK_FORCEINLINE
519  BasePack<ePackType::ePT_Avx, TOut>
520  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in)
521  {
522  BasePack<ePackType::ePT_Avx, TOut> out;
523  BasePack<ePackType::ePT_Avx, ipInt32> inInt32;
524 
525  CastPack<eInstructionSet::eIS_Avx2, ipReal64, ipInt32>::act(in, inInt32);
526  CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(inInt32, out);
527  return out;
528  }
529 
530  static
531  IPSDK_FORCEINLINE
532  void
533  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in,
534  BasePack<ePackType::ePT_Avx, TOut>& out)
535  {
536  BasePack<ePackType::ePT_Avx, ipInt32> inInt32;
537 
538  CastPack<eInstructionSet::eIS_Avx2, ipReal64, ipInt32>::act(in, inInt32);
539  CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(inInt32, out);
540  }
541 };
542 
545 
546 } // end of namespace detail
547 } // end of namespace simd
548 } // end of namespace ipsdk
549 
550 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: CastPack.h:33
structure containing set of masks for vectorized operations
Definition: BaseMaskPackDecl.h:29
BaseMaskPack class; defines a set of masks; the number of masks in this set depends on the type of th...
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
Definition: CastReg.h:30
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29