15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERREGIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERREGIMPL_H__ 24 #include <boost/mpl/and.hpp> 25 #include <boost/type_traits/is_same.hpp> 26 #include <boost/type_traits/is_signed.hpp> 39 typename boost::enable_if<
40 typename boost::mpl::and_<typename boost::is_integral<T>::type,
41 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
45 ::act(
const T* baseAddress,
47 typename Avx512Type<T>::Type& out)
51 contiguous[i] = *(baseAddress + indexes[i]);
53 LoadReg<eInstructionSet::eIS_Avx512, T>::act(out, contiguous);
60 typename boost::enable_if<
61 typename boost::mpl::and_<typename boost::is_integral<T>::type,
62 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
66 ::act(
const T* baseAddress,
67 const Avx512Type<ipUInt32>::Type& regIdx1,
68 const Avx512Type<ipUInt32>::Type& regIdx2,
69 const Avx512Type<ipUInt32>::Type& regIdx3,
70 const Avx512Type<ipUInt32>::Type& regIdx4,
71 typename Avx512Type<T>::Type& out)
74 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx1, indexes);
75 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx2, indexes+16);
76 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx3, indexes+32);
77 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx4, indexes+48);
81 contiguous[i] = *(baseAddress + indexes[i]);
83 LoadReg<eInstructionSet::eIS_Avx512, T>::act(out, contiguous);
90 typename boost::enable_if<
91 typename boost::mpl::and_<typename boost::is_integral<T>::type,
92 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
96 ::act(
const T* baseAddress,
98 typename Avx512Type<T>::Type& out)
102 contiguous[i] = *(baseAddress + indexes[i]);
104 LoadReg<eInstructionSet::eIS_Avx512, T>::act(out, contiguous);
107 template <
typename T>
111 typename boost::enable_if<
112 typename boost::mpl::and_<typename boost::is_integral<T>::type,
113 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
117 ::act(
const T* baseAddress,
118 const Avx512Type<ipUInt32>::Type& regIdx1,
119 const Avx512Type<ipUInt32>::Type& regIdx2,
120 typename Avx512Type<T>::Type& out)
123 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx1, indexes);
124 UnloadReg<eInstructionSet::eIS_Avx512, ipUInt32>::act(regIdx2, indexes+16);
128 contiguous[i] = *(baseAddress + indexes[i]);
130 LoadReg<eInstructionSet::eIS_Avx512, T>::act(out, contiguous);
133 template <
typename T>
137 typename boost::enable_if<
138 typename boost::mpl::and_<typename boost::is_integral<T>::type,
139 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
143 ::act(
const T* baseAddress,
const ipUInt32* indexes,
144 typename Avx512Type<T>::Type& out)
146 out = _mm512_set_epi32(
147 baseAddress[indexes[15]],
148 baseAddress[indexes[14]],
149 baseAddress[indexes[13]],
150 baseAddress[indexes[12]],
151 baseAddress[indexes[11]],
152 baseAddress[indexes[10]],
153 baseAddress[indexes[9]],
154 baseAddress[indexes[8]],
155 baseAddress[indexes[7]],
156 baseAddress[indexes[6]],
157 baseAddress[indexes[5]],
158 baseAddress[indexes[4]],
159 baseAddress[indexes[3]],
160 baseAddress[indexes[2]],
161 baseAddress[indexes[1]],
162 baseAddress[indexes[0]]);
165 template <
typename T>
169 typename boost::enable_if<
170 typename boost::mpl::and_<typename boost::is_integral<T>::type,
171 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
175 ::act(
const T* baseAddress,
const Avx512Type<ipUInt32>::Type& regIdx,
176 typename Avx512Type<T>::Type& out)
178 const int* baseAddrCvt =
reinterpret_cast<const int*
>(baseAddress);
179 const Avx512Type<ipUInt32>::Type regIdxLocal = regIdx;
180 out = _mm512_i32gather_epi32(regIdxLocal, baseAddrCvt, 4);
185 GatherReg<eInstructionSet::eIS_Avx512, ipReal32>::act(
187 Avx512Type<ipReal32>::Type& out)
190 baseAddress[indexes[15]],
191 baseAddress[indexes[14]],
192 baseAddress[indexes[13]],
193 baseAddress[indexes[12]],
194 baseAddress[indexes[11]],
195 baseAddress[indexes[10]],
196 baseAddress[indexes[9]],
197 baseAddress[indexes[8]],
198 baseAddress[indexes[7]],
199 baseAddress[indexes[6]],
200 baseAddress[indexes[5]],
201 baseAddress[indexes[4]],
202 baseAddress[indexes[3]],
203 baseAddress[indexes[2]],
204 baseAddress[indexes[1]],
205 baseAddress[indexes[0]]);
211 GatherReg<eInstructionSet::eIS_Avx512, ipReal32>::act(
212 const ipReal32* baseAddress,
const Avx512Type<ipUInt32>::Type& regIdx,
213 Avx512Type<ipReal32>::Type& out)
215 const Avx512Type<ipUInt32>::Type regIdxLocal = regIdx;
216 out = _mm512_i32gather_ps(regIdxLocal, baseAddress, 4);
221 GatherReg<eInstructionSet::eIS_Avx512, ipReal64>::act(
223 Avx512Type<ipReal64>::Type& out)
226 baseAddress[indexes[7]],
227 baseAddress[indexes[6]],
228 baseAddress[indexes[5]],
229 baseAddress[indexes[4]],
230 baseAddress[indexes[3]],
231 baseAddress[indexes[2]],
232 baseAddress[indexes[1]],
233 baseAddress[indexes[0]]);
239 GatherReg<eInstructionSet::eIS_Avx512, ipReal64>::act(
240 const ipReal64* baseAddress,
const __m256i& regIdx,
241 Avx512Type<ipReal64>::Type& out)
243 const __m256i regIdxLocal = regIdx;
244 out = _mm512_i32gather_pd(regIdxLocal, baseAddress, 8);
254 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERREGIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53