IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
CastPack.h
Go to the documentation of this file.
1 // CastPack.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__
17 
29 
30 #include <boost/type_traits/is_signed.hpp>
31 
32 namespace ipsdk {
33 namespace simd {
34 namespace detail {
35 
38 
39 template <typename TIn, typename TOut>
40 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
41  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==1>::type>
42 {
44  IPSDK_FORCEINLINE
46  {
49  return out;
50  }
51 
52  static
53  IPSDK_FORCEINLINE
54  void
57  {
59  }
60 
61  static
62  IPSDK_FORCEINLINE
63  void
66  {
70  in._val[0] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[0], zeros, ones);
72  }
73 };
74 
75 template <typename TIn, typename TOut>
76 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
77  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==2>::type>
78 {
80  IPSDK_FORCEINLINE
82  {
86  return out;
87  }
88 
89  static
90  IPSDK_FORCEINLINE
91  void
94  {
97  }
98 
99  static
100  IPSDK_FORCEINLINE
101  void
104  {
108  in._val[0] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[0], zeros, ones);
109  in._val[1] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[1], zeros, ones);
110 
113  }
114 };
115 
116 template <typename TIn, typename TOut>
118  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==4>::type>
119 {
121  IPSDK_FORCEINLINE
123  {
129  return out;
130  }
131 
132  static
133  IPSDK_FORCEINLINE
134  void
137  {
142  }
143 
144  static
145  IPSDK_FORCEINLINE
146  void
149  {
153  in._val[0] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[0], zeros, ones);
154  in._val[1] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[1], zeros, ones);
155  in._val[2] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[2], zeros, ones);
156  in._val[3] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[3], zeros, ones);
161  }
162 };
163 
164 template <typename TIn, typename TOut>
166  typename boost::enable_if_c<sizeof(TIn)==8 && sizeof(TOut)==8>::type>
167 {
169  IPSDK_FORCEINLINE
171  {
181  return out;
182  }
183 
184  static
185  IPSDK_FORCEINLINE
186  void
189  {
198  }
199 
200  static
201  IPSDK_FORCEINLINE
202  void
205  {
209  in._val[0] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[0], zeros, ones);
210  in._val[1] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[1], zeros, ones);
211  in._val[2] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[2], zeros, ones);
212  in._val[3] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[3], zeros, ones);
213  in._val[4] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[4], zeros, ones);
214  in._val[5] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[5], zeros, ones);
215  in._val[6] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[6], zeros, ones);
216  in._val[7] = MaskBlendReg<eInstructionSet::eIS_Avx512, TIn>::act(inMask._val[7], zeros, ones);
225  }
226 };
227 
228 template <typename TIn, typename TOut>
230  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==2>::type>
231 {
232  static IPSDK_FORCEINLINE
235  {
237  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
238  return out;
239  }
240 
241  static
242  IPSDK_FORCEINLINE
243  void
246  {
247  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
248  }
249 };
250 
251 template <typename TIn, typename TOut>
253  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==4>::type>
254 {
255  static IPSDK_FORCEINLINE
258  {
260  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
261  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[1], out._val[2], out._val[3]);
262  return out;
263  }
264 
265  static
266  IPSDK_FORCEINLINE
267  void
270  {
271  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[0], out._val[0], out._val[1]);
272  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(in._val[1], out._val[2], out._val[3]);
273  }
274 };
275 
276 template <typename TIn, typename TOut>
278  typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==4>::type>
279 {
280  static IPSDK_FORCEINLINE
283  {
284  typedef typename UpperType<TIn>::Type T16;
286  CastReg<eInstructionSet::eIS_Avx512, TIn, T16>::act(in._val[0], in16._val[0], in16._val[1]);
288  CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(in16._val[0], out._val[0], out._val[1]);
289  CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(in16._val[1], out._val[2], out._val[3]);
290  return out;
291  }
292 
293  static
294  IPSDK_FORCEINLINE
295  void
298  {
299  typedef typename UpperType<TIn>::Type T16;
302  in._val[0], in16._val[0], in16._val[1]);
303 
305  in16._val[0], out._val[0], out._val[1]);
307  in16._val[1], out._val[2], out._val[3]);
308  }
309 };
310 
311 template <typename TIn, typename TOut>
313  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==8>::type>
314 {
315  static IPSDK_FORCEINLINE
318  {
321  Cast::act(in._val[0], out._val[0], out._val[1]);
322  Cast::act(in._val[1], out._val[2], out._val[3]);
323  Cast::act(in._val[2], out._val[4], out._val[5]);
324  Cast::act(in._val[3], out._val[6], out._val[7]);
325  return out;
326  }
327 
328  static
329  IPSDK_FORCEINLINE
330  void
333  {
335  Cast::act(in._val[0], out._val[0], out._val[1]);
336  Cast::act(in._val[1], out._val[2], out._val[3]);
337  Cast::act(in._val[2], out._val[4], out._val[5]);
338  Cast::act(in._val[3], out._val[6], out._val[7]);
339  }
340 };
341 
342 template <typename TIn, typename TOut>
343 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
344  typename boost::enable_if_c<sizeof(TIn) <= 2 && sizeof(TOut)==8>::type>
345 {
346  static IPSDK_FORCEINLINE
349  {
353  }
354 
355  static
356  IPSDK_FORCEINLINE
357  void
360  {
362  CastPack<eInstructionSet::eIS_Avx512, TIn, ipInt32>::act(in, packInt32);
363  CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(packInt32, out);
364  }
365 };
366 
367 template <typename TIn, typename TOut>
368 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
369  typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==1>::type>
370 {
371  static IPSDK_FORCEINLINE
372  BasePack<ePackType::ePT_Avx512, TOut>
373  act(const BasePack<ePackType::ePT_Avx512, TIn>& in)
374  {
375  BasePack<ePackType::ePT_Avx512, TOut> out;
376  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
377  in._val[0], in._val[1], out._val[0]);
378  return out;
379  }
380 
381  static
382  IPSDK_FORCEINLINE
383  void
384  act(const BasePack<ePackType::ePT_Avx512, TIn>& in,
385  BasePack<ePackType::ePT_Avx512, TOut>& out)
386  {
387  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
388  in._val[0], in._val[1], out._val[0]);
389  }
390 };
391 
392 template <typename TIn, typename TOut>
393 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
394  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==2>::type>
395 {
396  static IPSDK_FORCEINLINE
397  BasePack<ePackType::ePT_Avx512, TOut>
398  act(const BasePack<ePackType::ePT_Avx512, TIn>& in)
399  {
400  BasePack<ePackType::ePT_Avx512, TOut> out;
401  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
402  in._val[0], in._val[1], out._val[0]);
403  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
404  in._val[2], in._val[3], out._val[1]);
405  return out;
406  }
407 
408  static
409  IPSDK_FORCEINLINE
410  void
411  act(const BasePack<ePackType::ePT_Avx512, TIn>& in,
412  BasePack<ePackType::ePT_Avx512, TOut>& out)
413  {
414  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
415  in._val[0], in._val[1], out._val[0]);
416  CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
417  in._val[2], in._val[3], out._val[1]);
418  }
419 };
420 
421 template <typename TIn, typename TOut>
422 struct CastPack<eInstructionSet::eIS_Avx512, TIn, TOut,
423  typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==1>::type>
424 {
425  static IPSDK_FORCEINLINE
426  BasePack<ePackType::ePT_Avx512, TOut>
427  act(const BasePack<ePackType::ePT_Avx512, TIn>& in)
428  {
429  typedef typename UpperType<TOut>::Type T16;
430  BasePack<ePackType::ePT_Avx512, T16> in16;
431  CastPack<eInstructionSet::eIS_Avx512, TIn, T16>::act(in, in16);
432  BasePack<ePackType::ePT_Avx512, TOut> out;
433  CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(
434  in16._val[0], in16._val[1], out._val[0]);
435  return out;
436  }
437 
438  static
439  IPSDK_FORCEINLINE
440  void
441  act(const BasePack<ePackType::ePT_Avx512, TIn>& in,
442  BasePack<ePackType::ePT_Avx512, TOut>& out)
443  {
444  typedef typename UpperType<TOut>::Type T16;
445  BasePack<ePackType::ePT_Avx512, T16> in16;
446  CastPack<eInstructionSet::eIS_Avx512, TIn, T16>::act(in, in16);
447  CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(
448  in16._val[0], in16._val[1], out._val[0]);
449  }
450 };
451 /*
452 template <>
453 struct CastPack<eInstructionSet::eIS_Avx512, ipReal32, ipUInt64>
454 {
455  static IPSDK_FORCEINLINE
456  BasePack<ePackType::ePT_Avx512, ipUInt64>
457  act(const BasePack<ePackType::ePT_Avx512, ipReal32>& in)
458  {
459  return CastPack<eInstructionSet::eIS_Avx512, ipUInt32, ipUInt64>::act(
460  CastPack<eInstructionSet::eIS_Avx512, ipReal32, ipUInt32>::act(in));
461  }
462 
463  static IPSDK_FORCEINLINE
464  void
465  act(const BasePack<ePackType::ePT_Avx512, ipReal32>& in,
466  BasePack<ePackType::ePT_Avx512, ipUInt64>& out)
467  {
468  BasePack<ePackType::ePT_Avx512, ipUInt32> interm;
469  CastPack<eInstructionSet::eIS_Avx512, ipReal32, ipUInt32>::act(in, interm);
470  CastPack<eInstructionSet::eIS_Avx512, ipUInt32, ipUInt64>::act(interm, out);
471  }
472 };*/
473 
474 template <>
475 struct CastPack<eInstructionSet::eIS_Avx512, ipReal64, ipReal32>
476 {
477  static IPSDK_FORCEINLINE
478  BasePack<ePackType::ePT_Avx512, ipReal32>
479  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
480  {
481  BasePack<ePackType::ePT_Avx512, ipReal32> out;
482  typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, ipReal32> Cast;
483 
484  Cast::act(in._val[0], in._val[1], out._val[0]);
485  Cast::act(in._val[2], in._val[3], out._val[1]);
486  Cast::act(in._val[4], in._val[5], out._val[2]);
487  Cast::act(in._val[6], in._val[7], out._val[3]);
488  return out;
489  }
490 
491  static
492  IPSDK_FORCEINLINE
493  void
494  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
495  BasePack<ePackType::ePT_Avx512, ipReal32>& out)
496  {
497  typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, ipReal32> Cast;
498  Cast::act(in._val[0], in._val[1], out._val[0]);
499  Cast::act(in._val[2], in._val[3], out._val[1]);
500  Cast::act(in._val[4], in._val[5], out._val[2]);
501  Cast::act(in._val[6], in._val[7], out._val[3]);
502  }
503 };
504 
505 template <typename TOut>
506 struct CastPack<eInstructionSet::eIS_Avx512, ipReal64, TOut,
507  typename boost::enable_if_c<
508  sizeof(TOut)==4 &&
509  boost::is_integral<TOut>::value
510  >::type
511 >
512 {
513  static IPSDK_FORCEINLINE
514  BasePack<ePackType::ePT_Avx512, TOut>
515  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
516  {
517  BasePack<ePackType::ePT_Avx512, TOut> out;
518  typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, TOut> Cast;
519 
520  Cast::act(in._val[0], in._val[1], out._val[0]);
521  Cast::act(in._val[2], in._val[3], out._val[1]);
522  Cast::act(in._val[4], in._val[5], out._val[2]);
523  Cast::act(in._val[6], in._val[7], out._val[3]);
524  return out;
525  }
526 
527  static
528  IPSDK_FORCEINLINE
529  void
530  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
531  BasePack<ePackType::ePT_Avx512, TOut>& out)
532  {
533  typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, TOut> Cast;
534  Cast::act(in._val[0], in._val[1], out._val[0]);
535  Cast::act(in._val[2], in._val[3], out._val[1]);
536  Cast::act(in._val[4], in._val[5], out._val[2]);
537  Cast::act(in._val[6], in._val[7], out._val[3]);
538  }
539 };
540 
541 template <typename TOut>
542 struct CastPack<eInstructionSet::eIS_Avx512, ipReal64, TOut,
543  typename boost::enable_if_c<
544  sizeof(TOut)<4 &&
545  boost::is_integral<TOut>::value
546  >::type
547 >
548 {
549  static IPSDK_FORCEINLINE
550  BasePack<ePackType::ePT_Avx512, TOut>
551  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
552  {
553  BasePack<ePackType::ePT_Avx512, TOut> out;
554  BasePack<ePackType::ePT_Avx512, ipInt32> inInt32;
555 
556  CastPack<eInstructionSet::eIS_Avx512, ipReal64, ipInt32>::act(in, inInt32);
557  CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(inInt32, out);
558  return out;
559  }
560 
561  static
562  IPSDK_FORCEINLINE
563  void
564  act(const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
565  BasePack<ePackType::ePT_Avx512, TOut>& out)
566  {
567  BasePack<ePackType::ePT_Avx512, ipInt32> inInt32;
568 
569  CastPack<eInstructionSet::eIS_Avx512, ipReal64, ipInt32>::act(in, inInt32);
570  CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(inInt32, out);
571  }
572 };
573 
576 
577 } // end of namespace detail
578 } // end of namespace simd
579 } // end of namespace ipsdk
580 
581 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: CastPack.h:33
structure containing set of masks for vectorized operations
Definition: BaseMaskPackDecl.h:29
Definition: AllBitsToOneReg.h:33
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition: CastReg.h:30
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
template structures used for immediate type promotion
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Definition of import/export macro for library.
Definition: AllBitsToZeroReg.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
Definition: MaskBlendReg.h:30
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29