15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__ 23 #include <boost/mpl/and.hpp> 24 #include <boost/type_traits/is_same.hpp> 25 #include <boost/type_traits/is_signed.hpp> 38 typename boost::enable_if<
39 typename boost::mpl::and_<typename boost::is_integral<T>::type,
40 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
44 ::act(
const typename AvxType<T>::Type& in,
49 _mm256_storeu_si256(
reinterpret_cast<typename AvxType<T>::Type*
>(buffer), in);
85 outBaseAddr[indexes[i]] = buffer[i];
92 typename boost::enable_if<
93 typename boost::mpl::and_<typename boost::is_integral<T>::type,
94 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
98 ::act(
const typename AvxType<T>::Type& in,
99 const AvxType<ipUInt32>::Type& regIdx1,
100 const AvxType<ipUInt32>::Type& regIdx2,
101 const AvxType<ipUInt32>::Type& regIdx3,
102 const AvxType<ipUInt32>::Type& regIdx4,
103 T*
const outBaseAddr)
106 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
107 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
108 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx3, indexes+16);
109 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx4, indexes+24);
111 ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
114 template <
typename T>
118 typename boost::enable_if<
119 typename boost::mpl::and_<typename boost::is_integral<T>::type,
120 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
124 ::act(
const typename AvxType<T>::Type& in,
126 T*
const outBaseAddr)
129 _mm256_storeu_si256(
reinterpret_cast<typename AvxType<T>::Type*
>(buffer), in);
148 outBaseAddr[indexes[i]] = buffer[i];
151 template <
typename T>
155 typename boost::enable_if<
156 typename boost::mpl::and_<typename boost::is_integral<T>::type,
157 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
161 ::act(
const typename AvxType<T>::Type& in,
162 const AvxType<ipUInt32>::Type& regIdx1,
163 const AvxType<ipUInt32>::Type& regIdx2,
164 T*
const outBaseAddr)
167 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
168 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
170 ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
173 template <
typename T>
177 typename boost::enable_if<
178 typename boost::mpl::and_<typename boost::is_integral<T>::type,
179 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
183 ::act(
const typename AvxType<T>::Type& in,
const ipUInt32* indexes,
184 T*
const outBaseAddr)
187 _mm256_storeu_si256(
reinterpret_cast<typename AvxType<T>::Type*
>(buffer), in);
198 outBaseAddr[indexes[i]] = buffer[i];
201 template <
typename T>
205 typename boost::enable_if<
206 typename boost::mpl::and_<typename boost::is_integral<T>::type,
207 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
212 const typename AvxType<T>::Type& in,
213 const AvxType<ipUInt32>::Type& regIdx,
214 T*
const outBaseAddr)
217 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx, indexes);
219 ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
225 ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
226 const AvxType<ipReal32>::Type& in,
231 _mm256_storeu_ps(buffer, in);
242 outBaseAddr[indexes[i]] = buffer[i];
248 ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
249 const AvxType<ipReal32>::Type& in,
250 const AvxType<ipUInt32>::Type& regIdx,
254 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx, indexes);
255 ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(in, indexes, outBaseAddr);
261 ScatterReg<eInstructionSet::eIS_Avx2, ipReal64>::act(
262 const AvxType<ipReal64>::Type& in,
267 _mm256_storeu_pd(buffer, in);
274 outBaseAddr[indexes[i]] = buffer[i];
284 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
uint8_t ipUInt8
Base types definition.
Definition: BaseTypes.h:49
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53