15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERPACKIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERPACKIMPL_H__ 34 typename boost::enable_if_c<sizeof(T) == 1>::type
38 BasePack<ePackType::ePT_Avx512, T>& out)
40 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
50 typename boost::enable_if_c<sizeof(T) == 1>::type
53 const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
54 BasePack<ePackType::ePT_Avx512, T>& out)
56 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
58 packIdx._val[0], packIdx._val[1],
59 packIdx._val[2], packIdx._val[3],
67 typename boost::enable_if_c<sizeof(T) == 2>::type
71 BasePack<ePackType::ePT_Avx512, T>& out)
73 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
74 baseAddress, indexes, out._val[0]);
75 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
76 baseAddress, indexes+32, out._val[1]);
83 typename boost::enable_if_c<sizeof(T) == 2>::type
86 const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
87 BasePack<ePackType::ePT_Avx512, T>& out)
89 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
90 baseAddress, packIdx._val[0], packIdx._val[1], out._val[0]);
91 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
92 baseAddress, packIdx._val[2], packIdx._val[3], out._val[1]);
99 typename boost::enable_if_c<sizeof(T) == 4>::type
101 const T* baseAddress,
103 BasePack<ePackType::ePT_Avx512, T>& out)
105 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
106 baseAddress, indexes, out._val[0]);
107 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
108 baseAddress, indexes+16, out._val[1]);
109 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
110 baseAddress, indexes+32, out._val[2]);
111 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
112 baseAddress, indexes+48, out._val[3]);
115 template <
typename T>
119 typename boost::enable_if_c<sizeof(T) == 4>::type
121 const T* baseAddress,
122 const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
123 BasePack<ePackType::ePT_Avx512, T>& out)
125 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
126 baseAddress, packIdx._val[0], out._val[0]);
127 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
128 baseAddress, packIdx._val[1], out._val[1]);
129 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
130 baseAddress, packIdx._val[2], out._val[2]);
131 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
132 baseAddress, packIdx._val[3], out._val[3]);
135 template <
typename T>
139 typename boost::enable_if_c<sizeof(T) == 8>::type
141 const T* baseAddress,
143 BasePack<ePackType::ePT_Avx512, T>& out)
145 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
146 baseAddress, indexes, out._val[0]);
147 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
148 baseAddress, indexes+8, out._val[1]);
149 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
150 baseAddress, indexes+16, out._val[2]);
151 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
152 baseAddress, indexes+24, out._val[3]);
153 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
154 baseAddress, indexes+32, out._val[4]);
155 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
156 baseAddress, indexes+40, out._val[5]);
157 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
158 baseAddress, indexes+48, out._val[6]);
159 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
160 baseAddress, indexes+56, out._val[7]);
163 template <
typename T>
167 typename boost::enable_if_c<sizeof(T) == 8>::type
169 const T* baseAddress,
170 const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
171 BasePack<ePackType::ePT_Avx512, T>& out)
173 const __m512i duplicateHiMask = _mm512_setr_epi32(
174 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
176 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
177 baseAddress, _mm512_castsi512_si256(packIdx._val[0]), out._val[0]);
178 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
180 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
181 packIdx._val[0], duplicateHiMask, packIdx._val[0])),
183 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
184 baseAddress, _mm512_castsi512_si256(packIdx._val[1]), out._val[2]);
185 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
187 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
188 packIdx._val[1], duplicateHiMask, packIdx._val[1])),
190 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
191 baseAddress, _mm512_castsi512_si256(packIdx._val[2]), out._val[4]);
192 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
194 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
195 packIdx._val[2], duplicateHiMask, packIdx._val[2])),
197 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
198 baseAddress, _mm512_castsi512_si256(packIdx._val[3]), out._val[6]);
199 GatherReg<eInstructionSet::eIS_Avx512, T>::act(
201 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
202 packIdx._val[3], duplicateHiMask, packIdx._val[3])),
213 #endif // __IPSDKUTIL_INSTRUCTIONSET_GATHER_DETAIL_AVX512_GATHERPACKIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition of import/export macro for library.
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53