15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__ 36 typename boost::enable_if_c<
37 boost::is_integral<T>::value
42 static IPSDK_FORCEINLINE
49 cast<eInstructionSet::eIS_Sse2, typename UpperType<T>::Type>(in);
51 p16._val[0] = AddR::act(p16._val[0], p16._val[1]);
52 p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 8));
53 p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 4));
54 p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 2));
57 _mm_extract_epi16(p16._val[0], 0));
65 typename boost::enable_if_c<
66 boost::is_integral<T>::value
71 static IPSDK_FORCEINLINE
79 cast<eInstructionSet::eIS_Sse2, typename UpperType<T>::Type>(in);
81 in32._val[0] = AddR::act(in32._val[0], in32._val[1]);
82 in32._val[0] = AddR::act(in32._val[0], in32._val[2]);
83 in32._val[0] = AddR::act(in32._val[0], in32._val[3]);
84 in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 16));
85 in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 8));
86 in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 4));
100 typename boost::enable_if_c<boost::is_integral<T>::value
101 && sizeof(T) == 4>::type>
103 static IPSDK_FORCEINLINE
114 template <
typename T>
116 typename boost::enable_if_c<boost::is_integral<T>::value
117 && sizeof(T) == 8>::type>
119 static IPSDK_FORCEINLINE
124 res._val[0] = _mm_add_epi64(in._val[0], in._val[1]);
125 res._val[0] = _mm_add_epi64(res._val[0], in._val[2]);
126 res._val[0] = _mm_add_epi64(res._val[0], in._val[3]);
127 res._val[0] = _mm_add_epi64(res._val[0], in._val[4]);
128 res._val[0] = _mm_add_epi64(res._val[0], in._val[5]);
129 res._val[0] = _mm_add_epi64(res._val[0], in._val[6]);
130 res._val[0] = _mm_add_epi64(res._val[0], in._val[7]);
133 _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 32));
135 _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 16));
137 _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 8));
152 static IPSDK_FORCEINLINE
157 res._val[0] = _mm_add_pd(in._val[0], in._val[1]);
158 res._val[0] = _mm_add_pd(res._val[0], in._val[2]);
159 res._val[0] = _mm_add_pd(res._val[0], in._val[3]);
160 res._val[0] = _mm_add_pd(res._val[0], in._val[4]);
161 res._val[0] = _mm_add_pd(res._val[0], in._val[5]);
162 res._val[0] = _mm_add_pd(res._val[0], in._val[6]);
163 res._val[0] = _mm_add_pd(res._val[0], in._val[7]);
166 _mm_storeu_pd(tab, res._val[0]);
168 return tab[0] + tab[1];
177 static IPSDK_FORCEINLINE
182 cast<eInstructionSet::eIS_Sse2, ipReal64>(in));
194 #endif // __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__
Definition: BasePack.h:37
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
specialization of ipsdk::simd::detail::AddReg structure for SSE2 instruction set
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
structure SumPack<eInstructionSet::domain instructionSet, typename T, typename Enable=void> ...
Definition: SumPack.h:40
IPSDK_FORCEINLINE ipsdk::simd::IS2Pack< instructionSet, TOut >::Type cast(const PackIn &in)
casts from PackIn type to PackOut type
Definition: cast.h:34
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
Definition: UnloadReg.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29