15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERPACKIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_GATHERPACKIMPL_H__ 35 typename boost::enable_if_c<sizeof(T) == 1>::type
39 BasePack<ePackType::ePT_Avx, T>& out)
41 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
51 typename boost::enable_if_c<sizeof(T) == 1>::type
54 const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
55 BasePack<ePackType::ePT_Avx, T>& out)
57 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
59 packIdx._val[0], packIdx._val[1],
60 packIdx._val[2], packIdx._val[3],
68 typename boost::enable_if_c<sizeof(T) == 2>::type
72 BasePack<ePackType::ePT_Avx, T>& out)
74 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
75 baseAddress, indexes, out._val[0]);
76 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
77 baseAddress, indexes+16, out._val[1]);
84 typename boost::enable_if_c<sizeof(T) == 2>::type
87 const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
88 BasePack<ePackType::ePT_Avx, T>& out)
90 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
91 baseAddress, packIdx._val[0], packIdx._val[1], out._val[0]);
92 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
93 baseAddress, packIdx._val[2], packIdx._val[3], out._val[1]);
100 typename boost::enable_if_c<sizeof(T) == 4>::type
102 const T* baseAddress,
104 BasePack<ePackType::ePT_Avx, T>& out)
106 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
107 baseAddress, indexes, out._val[0]);
108 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
109 baseAddress, indexes+8, out._val[1]);
110 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
111 baseAddress, indexes+16, out._val[2]);
112 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
113 baseAddress, indexes+24, out._val[3]);
116 template <
typename T>
120 typename boost::enable_if_c<sizeof(T) == 4>::type
122 const T* baseAddress,
123 const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
124 BasePack<ePackType::ePT_Avx, T>& out)
126 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
127 baseAddress, packIdx._val[0], out._val[0]);
128 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
129 baseAddress, packIdx._val[1], out._val[1]);
130 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
131 baseAddress, packIdx._val[2], out._val[2]);
132 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
133 baseAddress, packIdx._val[3], out._val[3]);
136 template <
typename T>
140 typename boost::enable_if_c<sizeof(T) == 8>::type
142 const T* baseAddress,
144 BasePack<ePackType::ePT_Avx, T>& out)
146 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
147 baseAddress, indexes, out._val[0]);
148 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
149 baseAddress, indexes+4, out._val[1]);
150 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
151 baseAddress, indexes+8, out._val[2]);
152 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
153 baseAddress, indexes+12, out._val[3]);
154 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
155 baseAddress, indexes+16, out._val[4]);
156 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
157 baseAddress, indexes+20, out._val[5]);
158 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
159 baseAddress, indexes+24, out._val[6]);
160 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
161 baseAddress, indexes+28, out._val[7]);
164 template <
typename T>
168 typename boost::enable_if_c<sizeof(T) == 8>::type
170 const T* baseAddress,
171 const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
172 BasePack<ePackType::ePT_Avx, T>& out)
174 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
175 baseAddress, _mm256_extractf128_si256(packIdx._val[0], 0), out._val[0]);
176 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
177 baseAddress, _mm256_extractf128_si256(packIdx._val[0], 1), out._val[1]);
178 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
179 baseAddress, _mm256_extractf128_si256(packIdx._val[1], 0), out._val[2]);
180 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
181 baseAddress, _mm256_extractf128_si256(packIdx._val[1], 1), out._val[3]);
182 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
183 baseAddress, _mm256_extractf128_si256(packIdx._val[2], 0), out._val[4]);
184 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
185 baseAddress, _mm256_extractf128_si256(packIdx._val[2], 1), out._val[5]);
186 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
187 baseAddress, _mm256_extractf128_si256(packIdx._val[3], 0), out._val[6]);
188 GatherReg<eInstructionSet::eIS_Avx2, T>::act(
189 baseAddress, _mm256_extractf128_si256(packIdx._val[3], 1), out._val[7]);
199 #endif // __IPSDKUTIL_INSTRUCTIONSET_GATHER_DETAIL_SSE2_GATHERPACKIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53