15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERREGIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERREGIMPL_H__ 24 #include <boost/mpl/and.hpp> 25 #include <boost/type_traits/is_same.hpp> 26 #include <boost/type_traits/is_signed.hpp> 39 typename boost::enable_if<
40 typename boost::mpl::and_<typename boost::is_integral<T>::type,
41 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
45 ::act(
const T* baseAddress,
47 typename AvxType<T>::Type& out)
51 contiguous[i] = *(baseAddress + indexes[i]);
53 LoadReg<eInstructionSet::eIS_Avx2, T>::act(out, contiguous);
60 typename boost::enable_if<
61 typename boost::mpl::and_<typename boost::is_integral<T>::type,
62 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
66 ::act(
const T* baseAddress,
67 const AvxType<ipUInt32>::Type& regIdx1,
68 const AvxType<ipUInt32>::Type& regIdx2,
69 const AvxType<ipUInt32>::Type& regIdx3,
70 const AvxType<ipUInt32>::Type& regIdx4,
71 typename AvxType<T>::Type& out)
74 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
75 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
76 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx3, indexes+16);
77 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx4, indexes+24);
81 contiguous[i] = *(baseAddress + indexes[i]);
83 LoadReg<eInstructionSet::eIS_Avx2, T>::act(out, contiguous);
90 typename boost::enable_if<
91 typename boost::mpl::and_<typename boost::is_integral<T>::type,
92 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
96 ::act(
const T* baseAddress,
98 typename AvxType<T>::Type& out)
102 contiguous[i] = *(baseAddress + indexes[i]);
104 LoadReg<eInstructionSet::eIS_Avx2, T>::act(out, contiguous);
107 template <
typename T>
111 typename boost::enable_if<
112 typename boost::mpl::and_<typename boost::is_integral<T>::type,
113 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
117 ::act(
const T* baseAddress,
118 const AvxType<ipUInt32>::Type& regIdx1,
119 const AvxType<ipUInt32>::Type& regIdx2,
120 typename AvxType<T>::Type& out)
123 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
124 UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
128 contiguous[i] = *(baseAddress + indexes[i]);
130 LoadReg<eInstructionSet::eIS_Avx2, T>::act(out, contiguous);
133 template <
typename T>
137 typename boost::enable_if<
138 typename boost::mpl::and_<typename boost::is_integral<T>::type,
139 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
143 ::act(
const T* baseAddress,
const ipUInt32* indexes,
144 typename AvxType<T>::Type& out)
146 out = _mm256_set_epi32(
147 baseAddress[indexes[7]],
148 baseAddress[indexes[6]],
149 baseAddress[indexes[5]],
150 baseAddress[indexes[4]],
151 baseAddress[indexes[3]],
152 baseAddress[indexes[2]],
153 baseAddress[indexes[1]],
154 baseAddress[indexes[0]]);
157 template <
typename T>
161 typename boost::enable_if<
162 typename boost::mpl::and_<typename boost::is_integral<T>::type,
163 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
167 ::act(
const T* baseAddress,
const AvxType<ipUInt32>::Type& regIdx,
168 typename AvxType<T>::Type& out)
170 const int* baseAddrCvt =
reinterpret_cast<const int*
>(baseAddress);
171 const AvxType<ipUInt32>::Type regIdxLocal = regIdx;
172 out = _mm256_i32gather_epi32(baseAddrCvt, regIdxLocal, 4);
177 GatherReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
179 AvxType<ipReal32>::Type& out)
182 baseAddress[indexes[7]],
183 baseAddress[indexes[6]],
184 baseAddress[indexes[5]],
185 baseAddress[indexes[4]],
186 baseAddress[indexes[3]],
187 baseAddress[indexes[2]],
188 baseAddress[indexes[1]],
189 baseAddress[indexes[0]]);
195 GatherReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
196 const ipReal32* baseAddress,
const AvxType<ipUInt32>::Type& regIdx,
197 AvxType<ipReal32>::Type& out)
199 const AvxType<ipUInt32>::Type regIdxLocal = regIdx;
200 out = _mm256_i32gather_ps(baseAddress, regIdxLocal, 4);
205 GatherReg<eInstructionSet::eIS_Avx2, ipReal64>::act(
207 AvxType<ipReal64>::Type& out)
210 baseAddress[indexes[3]],
211 baseAddress[indexes[2]],
212 baseAddress[indexes[1]],
213 baseAddress[indexes[0]]);
219 GatherReg<eInstructionSet::eIS_Avx2, ipReal64>::act(
220 const ipReal64* baseAddress,
const __m128i& regIdx,
221 AvxType<ipReal64>::Type& out)
223 const __m128i regIdxLocal = regIdx;
224 out = _mm256_i32gather_pd(baseAddress, regIdxLocal, 8);
234 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERREGIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53