15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__ 27 #include <boost/type_traits/is_signed.hpp> 36 template <
typename TIn,
typename TOut>
38 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==1>::type>
68 template <
typename TIn,
typename TOut>
70 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==2>::type>
103 template <
typename TIn,
typename TOut>
105 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==4>::type>
144 template <
typename TIn,
typename TOut>
146 typename boost::enable_if_c<sizeof(TIn)==8 && sizeof(TOut)==8>::type>
197 template <
typename TIn,
typename TOut>
199 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==2>::type>
201 static IPSDK_FORCEINLINE
220 template <
typename TIn,
typename TOut>
222 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==4>::type>
224 static IPSDK_FORCEINLINE
245 template <
typename TIn,
typename TOut>
247 typename boost::enable_if_c<sizeof(TIn)==1 && sizeof(TOut)==4>::type>
249 static IPSDK_FORCEINLINE
271 in._val[0], in16._val[0], in16._val[1]);
274 in16._val[0], out._val[0], out._val[1]);
276 in16._val[1], out._val[2], out._val[3]);
280 template <
typename TIn,
typename TOut>
282 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==8>::type>
284 static IPSDK_FORCEINLINE
290 Cast::act(in._val[0], out._val[0], out._val[1]);
291 Cast::act(in._val[1], out._val[2], out._val[3]);
292 Cast::act(in._val[2], out._val[4], out._val[5]);
293 Cast::act(in._val[3], out._val[6], out._val[7]);
304 Cast::act(in._val[0], out._val[0], out._val[1]);
305 Cast::act(in._val[1], out._val[2], out._val[3]);
306 Cast::act(in._val[2], out._val[4], out._val[5]);
307 Cast::act(in._val[3], out._val[6], out._val[7]);
311 template <
typename TIn,
typename TOut>
313 typename boost::enable_if_c<sizeof(TIn) <= 2 && sizeof(TOut)==8>::type>
315 static IPSDK_FORCEINLINE
331 CastPack<eInstructionSet::eIS_Avx2, TIn, ipInt32>::act(in, packInt32);
332 CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(packInt32, out);
336 template <
typename TIn,
typename TOut>
338 typename boost::enable_if_c<sizeof(TIn)==2 && sizeof(TOut)==1>::type>
340 static IPSDK_FORCEINLINE
341 BasePack<ePackType::ePT_Avx, TOut>
342 act(
const BasePack<ePackType::ePT_Avx, TIn>& in)
344 BasePack<ePackType::ePT_Avx, TOut> out;
345 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
346 in._val[0], in._val[1], out._val[0]);
353 act(
const BasePack<ePackType::ePT_Avx, TIn>& in,
354 BasePack<ePackType::ePT_Avx, TOut>& out)
356 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
357 in._val[0], in._val[1], out._val[0]);
361 template <
typename TIn,
typename TOut>
363 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==2>::type>
365 static IPSDK_FORCEINLINE
366 BasePack<ePackType::ePT_Avx, TOut>
367 act(
const BasePack<ePackType::ePT_Avx, TIn>& in)
369 BasePack<ePackType::ePT_Avx, TOut> out;
370 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
371 in._val[0], in._val[1], out._val[0]);
372 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
373 in._val[2], in._val[3], out._val[1]);
380 act(
const BasePack<ePackType::ePT_Avx, TIn>& in,
381 BasePack<ePackType::ePT_Avx, TOut>& out)
383 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
384 in._val[0], in._val[1], out._val[0]);
385 CastReg<eInstructionSet::eIS_Avx2, TIn, TOut>::act(
386 in._val[2], in._val[3], out._val[1]);
390 template <
typename TIn,
typename TOut>
392 typename boost::enable_if_c<sizeof(TIn)==4 && sizeof(TOut)==1>::type>
394 static IPSDK_FORCEINLINE
395 BasePack<ePackType::ePT_Avx, TOut>
396 act(
const BasePack<ePackType::ePT_Avx, TIn>& in)
398 typedef typename UpperType<TOut>::Type T16;
399 BasePack<ePackType::ePT_Avx, T16> in16;
400 CastPack<eInstructionSet::eIS_Avx2, TIn, T16>::act(in, in16);
401 BasePack<ePackType::ePT_Avx, TOut> out;
402 CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(
403 in16._val[0], in16._val[1], out._val[0]);
410 act(
const BasePack<ePackType::ePT_Avx, TIn>& in,
411 BasePack<ePackType::ePT_Avx, TOut>& out)
413 typedef typename UpperType<TOut>::Type T16;
414 BasePack<ePackType::ePT_Avx, T16> in16;
415 CastPack<eInstructionSet::eIS_Avx2, TIn, T16>::act(in, in16);
416 CastReg<eInstructionSet::eIS_Avx2, T16, TOut>::act(
417 in16._val[0], in16._val[1], out._val[0]);
446 static IPSDK_FORCEINLINE
447 BasePack<ePackType::ePT_Avx, ipReal32>
448 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in)
450 BasePack<ePackType::ePT_Avx, ipReal32> out;
451 typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, ipReal32> Cast;
453 Cast::act(in._val[0], in._val[1], out._val[0]);
454 Cast::act(in._val[2], in._val[3], out._val[1]);
455 Cast::act(in._val[4], in._val[5], out._val[2]);
456 Cast::act(in._val[6], in._val[7], out._val[3]);
463 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in,
464 BasePack<ePackType::ePT_Avx, ipReal32>& out)
466 typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, ipReal32> Cast;
467 Cast::act(in._val[0], in._val[1], out._val[0]);
468 Cast::act(in._val[2], in._val[3], out._val[1]);
469 Cast::act(in._val[4], in._val[5], out._val[2]);
470 Cast::act(in._val[6], in._val[7], out._val[3]);
474 template <
typename TOut>
476 typename boost::enable_if_c<
478 boost::is_integral<TOut>::value
482 static IPSDK_FORCEINLINE
483 BasePack<ePackType::ePT_Avx, TOut>
484 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in)
486 BasePack<ePackType::ePT_Avx, TOut> out;
487 typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, TOut> Cast;
489 Cast::act(in._val[0], in._val[1], out._val[0]);
490 Cast::act(in._val[2], in._val[3], out._val[1]);
491 Cast::act(in._val[4], in._val[5], out._val[2]);
492 Cast::act(in._val[6], in._val[7], out._val[3]);
499 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in,
500 BasePack<ePackType::ePT_Avx, TOut>& out)
502 typedef CastReg<eInstructionSet::eIS_Avx2, ipReal64, TOut> Cast;
503 Cast::act(in._val[0], in._val[1], out._val[0]);
504 Cast::act(in._val[2], in._val[3], out._val[1]);
505 Cast::act(in._val[4], in._val[5], out._val[2]);
506 Cast::act(in._val[6], in._val[7], out._val[3]);
510 template <
typename TOut>
512 typename boost::enable_if_c<
514 boost::is_integral<TOut>::value
518 static IPSDK_FORCEINLINE
519 BasePack<ePackType::ePT_Avx, TOut>
520 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in)
522 BasePack<ePackType::ePT_Avx, TOut> out;
523 BasePack<ePackType::ePT_Avx, ipInt32> inInt32;
525 CastPack<eInstructionSet::eIS_Avx2, ipReal64, ipInt32>::act(in, inInt32);
526 CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(inInt32, out);
533 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in,
534 BasePack<ePackType::ePT_Avx, TOut>& out)
536 BasePack<ePackType::ePT_Avx, ipInt32> inInt32;
538 CastPack<eInstructionSet::eIS_Avx2, ipReal64, ipInt32>::act(in, inInt32);
539 CastPack<eInstructionSet::eIS_Avx2, ipInt32, TOut>::act(inInt32, out);
550 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_CASTPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: CastPack.h:33
structure containing set of masks for vectorized operations
Definition: BaseMaskPackDecl.h:29
BaseMaskPack class; defines a set of masks; the number of masks in this set depends on the type of th...
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29