15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__ 35 typename boost::enable_if_c<boost::is_integral<T>::value
36 && sizeof(T) == 1>::type>
38 static IPSDK_FORCEINLINE
44 Pack16 in16 = cast<eInstructionSet::eIS_Avx2, typename UpperType<T>::Type>(in);
46 in16._val[0] = AddR::act(in16._val[0], in16._val[1]);
47 in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
48 in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
49 in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
52 _mm256_storeu_si256(
reinterpret_cast<typename
AvxType<T>::Type*
>(tab), in16._val[0]);
61 typename boost::enable_if_c<boost::is_integral<T>::value
62 && sizeof(T) == 2>::type>
64 static IPSDK_FORCEINLINE
72 cast<eInstructionSet::eIS_Avx2, typename UpperType<T>::Type>(in);
74 in32._val[0] = AddR::act(in32._val[0], in32._val[1]);
75 in32._val[0] = AddR::act(in32._val[0], in32._val[2]);
76 in32._val[0] = AddR::act(in32._val[0], in32._val[3]);
78 in32._val[0] = _mm256_hadd_epi32(in32._val[0], in32._val[0]);
79 in32._val[0] = _mm256_hadd_epi32(in32._val[0], in32._val[0]);
93 typename boost::enable_if_c<boost::is_integral<T>::value
94 && sizeof(T) == 4>::type>
96 static IPSDK_FORCEINLINE
107 template <
typename T>
109 typename boost::enable_if_c<boost::is_integral<T>::value
110 && sizeof(T) == 8>::type>
112 static IPSDK_FORCEINLINE
118 res._val[0] = _mm256_add_epi64(in._val[0], in._val[1]);
119 res._val[0] = _mm256_add_epi64(res._val[0], in._val[2]);
120 res._val[0] = _mm256_add_epi64(res._val[0], in._val[3]);
121 res._val[0] = _mm256_add_epi64(res._val[0], in._val[4]);
122 res._val[0] = _mm256_add_epi64(res._val[0], in._val[5]);
123 res._val[0] = _mm256_add_epi64(res._val[0], in._val[6]);
124 res._val[0] = _mm256_add_epi64(res._val[0], in._val[7]);
127 _mm256_add_epi64(res._val[0], _mm256_srli_si256(res._val[0], 8));
133 return tab[0] + tab[2];
142 static IPSDK_FORCEINLINE
147 res._val[0] = _mm256_add_pd(in._val[0], in._val[1]);
148 res._val[0] = _mm256_add_pd(res._val[0], in._val[2]);
149 res._val[0] = _mm256_add_pd(res._val[0], in._val[3]);
150 res._val[0] = _mm256_add_pd(res._val[0], in._val[4]);
151 res._val[0] = _mm256_add_pd(res._val[0], in._val[5]);
152 res._val[0] = _mm256_add_pd(res._val[0], in._val[6]);
153 res._val[0] = _mm256_add_pd(res._val[0], in._val[7]);
156 _mm256_storeu_pd(tab, res._val[0]);
158 return tab[0] + tab[1] + tab[2] + tab[3];
167 static IPSDK_FORCEINLINE
172 cast<eInstructionSet::eIS_Avx2, ipReal64>(in));
184 #endif // __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
int16_t ipInt16
Base types definition.
Definition: BaseTypes.h:50
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
structure SumPack<eInstructionSet::domain instructionSet, typename T, typename Enable=void> ...
Definition: SumPack.h:40
IPSDK_FORCEINLINE ipsdk::simd::IS2Pack< instructionSet, TOut >::Type cast(const PackIn &in)
casts from PackIn type to PackOut type
Definition: cast.h:34
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
specialization of ipsdk::simd::detail::AddReg structure for AVX2 instruction set
int64_t ipInt64
Base types definition.
Definition: BaseTypes.h:54
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
Definition: BasePack.h:37
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29