15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__ 30 #include <boost/type_traits/is_signed.hpp> 39 template <
typename TIn,
typename TOut>
41 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==1>::type>
75 template <
typename TIn,
typename TOut>
77 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==2>::type>
116 template <
typename TIn,
typename TOut>
118 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==4>::type>
164 template <
typename TIn,
typename TOut>
166 typename boost::enable_if_c<sizeof(TIn)==8 && sizeof(TOut)==8>::type>
228 template <
typename TIn,
typename TOut>
230 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==2>::type>
232 static IPSDK_FORCEINLINE
251 template <
typename TIn,
typename TOut>
253 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==4>::type>
255 static IPSDK_FORCEINLINE
276 template <
typename TIn,
typename TOut>
278 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==4>::type>
280 static IPSDK_FORCEINLINE
302 in._val[0], in16._val[0], in16._val[1]);
305 in16._val[0], out._val[0], out._val[1]);
307 in16._val[1], out._val[2], out._val[3]);
311 template <
typename TIn,
typename TOut>
313 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==8>::type>
315 static IPSDK_FORCEINLINE
321 Cast::act(in._val[0], out._val[0], out._val[1]);
322 Cast::act(in._val[1], out._val[2], out._val[3]);
323 Cast::act(in._val[2], out._val[4], out._val[5]);
324 Cast::act(in._val[3], out._val[6], out._val[7]);
335 Cast::act(in._val[0], out._val[0], out._val[1]);
336 Cast::act(in._val[1], out._val[2], out._val[3]);
337 Cast::act(in._val[2], out._val[4], out._val[5]);
338 Cast::act(in._val[3], out._val[6], out._val[7]);
342 template <
typename TIn,
typename TOut>
344 typename boost::enable_if_c<sizeof(TIn) <= 2 && sizeof(TOut)==8>::type>
346 static IPSDK_FORCEINLINE
362 CastPack<eInstructionSet::eIS_Avx512, TIn, ipInt32>::act(in, packInt32);
363 CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(packInt32, out);
367 template <
typename TIn,
typename TOut>
369 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==1>::type>
371 static IPSDK_FORCEINLINE
372 BasePack<ePackType::ePT_Avx512, TOut>
373 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in)
375 BasePack<ePackType::ePT_Avx512, TOut> out;
376 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
377 in._val[0], in._val[1], out._val[0]);
384 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in,
385 BasePack<ePackType::ePT_Avx512, TOut>& out)
387 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
388 in._val[0], in._val[1], out._val[0]);
392 template <
typename TIn,
typename TOut>
394 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==2>::type>
396 static IPSDK_FORCEINLINE
397 BasePack<ePackType::ePT_Avx512, TOut>
398 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in)
400 BasePack<ePackType::ePT_Avx512, TOut> out;
401 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
402 in._val[0], in._val[1], out._val[0]);
403 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
404 in._val[2], in._val[3], out._val[1]);
411 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in,
412 BasePack<ePackType::ePT_Avx512, TOut>& out)
414 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
415 in._val[0], in._val[1], out._val[0]);
416 CastReg<eInstructionSet::eIS_Avx512, TIn, TOut>::act(
417 in._val[2], in._val[3], out._val[1]);
421 template <
typename TIn,
typename TOut>
423 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==1>::type>
425 static IPSDK_FORCEINLINE
426 BasePack<ePackType::ePT_Avx512, TOut>
427 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in)
429 typedef typename UpperType<TOut>::Type T16;
430 BasePack<ePackType::ePT_Avx512, T16> in16;
431 CastPack<eInstructionSet::eIS_Avx512, TIn, T16>::act(in, in16);
432 BasePack<ePackType::ePT_Avx512, TOut> out;
433 CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(
434 in16._val[0], in16._val[1], out._val[0]);
441 act(
const BasePack<ePackType::ePT_Avx512, TIn>& in,
442 BasePack<ePackType::ePT_Avx512, TOut>& out)
444 typedef typename UpperType<TOut>::Type T16;
445 BasePack<ePackType::ePT_Avx512, T16> in16;
446 CastPack<eInstructionSet::eIS_Avx512, TIn, T16>::act(in, in16);
447 CastReg<eInstructionSet::eIS_Avx512, T16, TOut>::act(
448 in16._val[0], in16._val[1], out._val[0]);
477 static IPSDK_FORCEINLINE
478 BasePack<ePackType::ePT_Avx512, ipReal32>
479 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
481 BasePack<ePackType::ePT_Avx512, ipReal32> out;
482 typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, ipReal32> Cast;
484 Cast::act(in._val[0], in._val[1], out._val[0]);
485 Cast::act(in._val[2], in._val[3], out._val[1]);
486 Cast::act(in._val[4], in._val[5], out._val[2]);
487 Cast::act(in._val[6], in._val[7], out._val[3]);
494 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
495 BasePack<ePackType::ePT_Avx512, ipReal32>& out)
497 typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, ipReal32> Cast;
498 Cast::act(in._val[0], in._val[1], out._val[0]);
499 Cast::act(in._val[2], in._val[3], out._val[1]);
500 Cast::act(in._val[4], in._val[5], out._val[2]);
501 Cast::act(in._val[6], in._val[7], out._val[3]);
505 template <
typename TOut>
507 typename boost::enable_if_c<
509 boost::is_integral<TOut>::value
513 static IPSDK_FORCEINLINE
514 BasePack<ePackType::ePT_Avx512, TOut>
515 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
517 BasePack<ePackType::ePT_Avx512, TOut> out;
518 typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, TOut> Cast;
520 Cast::act(in._val[0], in._val[1], out._val[0]);
521 Cast::act(in._val[2], in._val[3], out._val[1]);
522 Cast::act(in._val[4], in._val[5], out._val[2]);
523 Cast::act(in._val[6], in._val[7], out._val[3]);
530 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
531 BasePack<ePackType::ePT_Avx512, TOut>& out)
533 typedef CastReg<eInstructionSet::eIS_Avx512, ipReal64, TOut> Cast;
534 Cast::act(in._val[0], in._val[1], out._val[0]);
535 Cast::act(in._val[2], in._val[3], out._val[1]);
536 Cast::act(in._val[4], in._val[5], out._val[2]);
537 Cast::act(in._val[6], in._val[7], out._val[3]);
541 template <
typename TOut>
543 typename boost::enable_if_c<
545 boost::is_integral<TOut>::value
549 static IPSDK_FORCEINLINE
550 BasePack<ePackType::ePT_Avx512, TOut>
551 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in)
553 BasePack<ePackType::ePT_Avx512, TOut> out;
554 BasePack<ePackType::ePT_Avx512, ipInt32> inInt32;
556 CastPack<eInstructionSet::eIS_Avx512, ipReal64, ipInt32>::act(in, inInt32);
557 CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(inInt32, out);
564 act(
const BasePack<ePackType::ePT_Avx512, ipReal64>& in,
565 BasePack<ePackType::ePT_Avx512, TOut>& out)
567 BasePack<ePackType::ePT_Avx512, ipInt32> inInt32;
569 CastPack<eInstructionSet::eIS_Avx512, ipReal64, ipInt32>::act(in, inInt32);
570 CastPack<eInstructionSet::eIS_Avx512, ipInt32, TOut>::act(inInt32, out);
581 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: CastPack.h:33
structure containing set of masks for vectorized operations
Definition: BaseMaskPackDecl.h:29
Definition: AllBitsToOneReg.h:33
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
template structures used for immediate type promotion
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Definition of import/export macro for library.
Definition: AllBitsToZeroReg.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
Definition: MaskBlendReg.h:30
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29