15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__ 23 #include <boost/mpl/and.hpp> 24 #include <boost/type_traits/is_same.hpp> 25 #include <boost/type_traits/is_signed.hpp> 38 typename boost::enable_if<
39 typename boost::mpl::and_<typename boost::is_integral<T>::type,
40 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
44 ::act(
const T* baseAddress,
46 typename Sse2Type<T>::Type& out)
48 out = _mm_set_epi8(*(baseAddress + indexes[15]),
49 *(baseAddress + indexes[14]),
50 *(baseAddress + indexes[13]),
51 *(baseAddress + indexes[12]),
52 *(baseAddress + indexes[11]),
53 *(baseAddress + indexes[10]),
54 *(baseAddress + indexes[9]),
55 *(baseAddress + indexes[8]),
56 *(baseAddress + indexes[7]),
57 *(baseAddress + indexes[6]),
58 *(baseAddress + indexes[5]),
59 *(baseAddress + indexes[4]),
60 *(baseAddress + indexes[3]),
61 *(baseAddress + indexes[2]),
62 *(baseAddress + indexes[1]),
63 *(baseAddress + indexes[0]));
70 typename boost::enable_if<
71 typename boost::mpl::and_<typename boost::is_integral<T>::type,
72 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
76 ::act(
const T* baseAddress,
77 const Sse2Type<ipUInt32>::Type& regIdx1,
78 const Sse2Type<ipUInt32>::Type& regIdx2,
79 const Sse2Type<ipUInt32>::Type& regIdx3,
80 const Sse2Type<ipUInt32>::Type& regIdx4,
81 typename Sse2Type<T>::Type& out)
84 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
85 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
86 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx3, indexes+8);
87 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx4, indexes+12);
88 out = _mm_set_epi8(*(baseAddress + indexes[15]),
89 *(baseAddress + indexes[14]),
90 *(baseAddress + indexes[13]),
91 *(baseAddress + indexes[12]),
92 *(baseAddress + indexes[11]),
93 *(baseAddress + indexes[10]),
94 *(baseAddress + indexes[9]),
95 *(baseAddress + indexes[8]),
96 *(baseAddress + indexes[7]),
97 *(baseAddress + indexes[6]),
98 *(baseAddress + indexes[5]),
99 *(baseAddress + indexes[4]),
100 *(baseAddress + indexes[3]),
101 *(baseAddress + indexes[2]),
102 *(baseAddress + indexes[1]),
103 *(baseAddress + indexes[0]));
106 template <
typename T>
110 typename boost::enable_if<
111 typename boost::mpl::and_<typename boost::is_integral<T>::type,
112 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
116 ::act(
const T* baseAddress,
118 typename Sse2Type<T>::Type& out)
120 out = _mm_set_epi16(*(baseAddress + indexes[7]),
121 *(baseAddress + indexes[6]),
122 *(baseAddress + indexes[5]),
123 *(baseAddress + indexes[4]),
124 *(baseAddress + indexes[3]),
125 *(baseAddress + indexes[2]),
126 *(baseAddress + indexes[1]),
127 *(baseAddress + indexes[0]));
130 template <
typename T>
134 typename boost::enable_if<
135 typename boost::mpl::and_<typename boost::is_integral<T>::type,
136 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
140 ::act(
const T* baseAddress,
141 const Sse2Type<ipUInt32>::Type& regIdx1,
142 const Sse2Type<ipUInt32>::Type& regIdx2,
143 typename Sse2Type<T>::Type& out)
146 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
147 UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
148 out = _mm_set_epi16(*(baseAddress + indexes[7]),
149 *(baseAddress + indexes[6]),
150 *(baseAddress + indexes[5]),
151 *(baseAddress + indexes[4]),
152 *(baseAddress + indexes[3]),
153 *(baseAddress + indexes[2]),
154 *(baseAddress + indexes[1]),
155 *(baseAddress + indexes[0]));
158 template <
typename T>
162 typename boost::enable_if<
163 typename boost::mpl::and_<typename boost::is_integral<T>::type,
164 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
168 ::act(
const T* baseAddress,
const ipUInt32* indexes,
169 typename Sse2Type<T>::Type& out)
171 out = _mm_set_epi32(*(baseAddress + indexes[3]),
172 *(baseAddress + indexes[2]),
173 *(baseAddress + indexes[1]),
174 *(baseAddress + indexes[0]));
177 template <
typename T>
181 typename boost::enable_if<
182 typename boost::mpl::and_<typename boost::is_integral<T>::type,
183 typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
187 ::act(
const T* baseAddress,
const Sse2Type<ipUInt32>::Type& regIdx,
188 typename Sse2Type<T>::Type& out)
190 out = _mm_setr_epi32(*(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 0))),
191 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 4))),
192 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 8))),
193 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 12))));
199 GatherReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
201 Sse2Type<ipReal32>::Type& out)
203 out = _mm_set_ps(*(baseAddress + indexes[3]),
204 *(baseAddress + indexes[2]),
205 *(baseAddress + indexes[1]),
206 *(baseAddress + indexes[0]));
212 GatherReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
213 const ipReal32* baseAddress,
const Sse2Type<ipUInt32>::Type& regIdx,
214 Sse2Type<ipReal32>::Type& out)
218 out = _mm_setr_ps(*(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 0))),
219 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 4))),
220 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 8))),
221 *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 12))));
227 GatherReg<eInstructionSet::eIS_Sse2, ipReal64>::act(
229 Sse2Type<ipReal64>::Type& out)
231 out = _mm_set_pd(*(baseAddress + indexes[1]),
232 *(baseAddress + indexes[0]));
242 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53