IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
GatherRegImpl.h
1 // GatherRegImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__
17 
22 
23 #include <boost/mpl/and.hpp>
24 #include <boost/type_traits/is_same.hpp>
25 #include <boost/type_traits/is_signed.hpp>
26 
27 namespace ipsdk {
28 namespace simd {
29 namespace detail {
30 
33 
34 template <typename T>
35 IPSDK_FORCEINLINE
36 void
37 GatherReg<eInstructionSet::eIS_Sse2, T,
38  typename boost::enable_if<
39  typename boost::mpl::and_<typename boost::is_integral<T>::type,
40  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
41  >::type
42  >::type
43 >
44 ::act(const T* baseAddress,
45  const ipUInt32* indexes,
46  typename Sse2Type<T>::Type& out)
47 {
48  out = _mm_set_epi8(*(baseAddress + indexes[15]),
49  *(baseAddress + indexes[14]),
50  *(baseAddress + indexes[13]),
51  *(baseAddress + indexes[12]),
52  *(baseAddress + indexes[11]),
53  *(baseAddress + indexes[10]),
54  *(baseAddress + indexes[9]),
55  *(baseAddress + indexes[8]),
56  *(baseAddress + indexes[7]),
57  *(baseAddress + indexes[6]),
58  *(baseAddress + indexes[5]),
59  *(baseAddress + indexes[4]),
60  *(baseAddress + indexes[3]),
61  *(baseAddress + indexes[2]),
62  *(baseAddress + indexes[1]),
63  *(baseAddress + indexes[0]));
64 }
65 
66 template <typename T>
67 IPSDK_FORCEINLINE
68 void
69 GatherReg<eInstructionSet::eIS_Sse2, T,
70  typename boost::enable_if<
71  typename boost::mpl::and_<typename boost::is_integral<T>::type,
72  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
73  >::type
74  >::type
75 >
76 ::act(const T* baseAddress,
77  const Sse2Type<ipUInt32>::Type& regIdx1,
78  const Sse2Type<ipUInt32>::Type& regIdx2,
79  const Sse2Type<ipUInt32>::Type& regIdx3,
80  const Sse2Type<ipUInt32>::Type& regIdx4,
81  typename Sse2Type<T>::Type& out)
82 {
83  ipUInt32 indexes[16];
84  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
85  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
86  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx3, indexes+8);
87  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx4, indexes+12);
88  out = _mm_set_epi8(*(baseAddress + indexes[15]),
89  *(baseAddress + indexes[14]),
90  *(baseAddress + indexes[13]),
91  *(baseAddress + indexes[12]),
92  *(baseAddress + indexes[11]),
93  *(baseAddress + indexes[10]),
94  *(baseAddress + indexes[9]),
95  *(baseAddress + indexes[8]),
96  *(baseAddress + indexes[7]),
97  *(baseAddress + indexes[6]),
98  *(baseAddress + indexes[5]),
99  *(baseAddress + indexes[4]),
100  *(baseAddress + indexes[3]),
101  *(baseAddress + indexes[2]),
102  *(baseAddress + indexes[1]),
103  *(baseAddress + indexes[0]));
104 }
105 
106 template <typename T>
107 IPSDK_FORCEINLINE
108 void
109 GatherReg<eInstructionSet::eIS_Sse2, T,
110  typename boost::enable_if<
111  typename boost::mpl::and_<typename boost::is_integral<T>::type,
112  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
113  >::type
114  >::type
115 >
116 ::act(const T* baseAddress,
117  const ipUInt32* indexes,
118  typename Sse2Type<T>::Type& out)
119 {
120  out = _mm_set_epi16(*(baseAddress + indexes[7]),
121  *(baseAddress + indexes[6]),
122  *(baseAddress + indexes[5]),
123  *(baseAddress + indexes[4]),
124  *(baseAddress + indexes[3]),
125  *(baseAddress + indexes[2]),
126  *(baseAddress + indexes[1]),
127  *(baseAddress + indexes[0]));
128 }
129 
130 template <typename T>
131 IPSDK_FORCEINLINE
132 void
133 GatherReg<eInstructionSet::eIS_Sse2, T,
134  typename boost::enable_if<
135  typename boost::mpl::and_<typename boost::is_integral<T>::type,
136  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
137  >::type
138  >::type
139 >
140 ::act(const T* baseAddress,
141  const Sse2Type<ipUInt32>::Type& regIdx1,
142  const Sse2Type<ipUInt32>::Type& regIdx2,
143  typename Sse2Type<T>::Type& out)
144 {
145  ipUInt32 indexes[8];
146  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
147  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
148  out = _mm_set_epi16(*(baseAddress + indexes[7]),
149  *(baseAddress + indexes[6]),
150  *(baseAddress + indexes[5]),
151  *(baseAddress + indexes[4]),
152  *(baseAddress + indexes[3]),
153  *(baseAddress + indexes[2]),
154  *(baseAddress + indexes[1]),
155  *(baseAddress + indexes[0]));
156 }
157 
158 template <typename T>
159 IPSDK_FORCEINLINE
160 void
161 GatherReg<eInstructionSet::eIS_Sse2, T,
162  typename boost::enable_if<
163  typename boost::mpl::and_<typename boost::is_integral<T>::type,
164  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
165  >::type
166  >::type
167 >
168 ::act(const T* baseAddress, const ipUInt32* indexes,
169  typename Sse2Type<T>::Type& out)
170 {
171  out = _mm_set_epi32(*(baseAddress + indexes[3]),
172  *(baseAddress + indexes[2]),
173  *(baseAddress + indexes[1]),
174  *(baseAddress + indexes[0]));
175 }
176 
177 template <typename T>
178 IPSDK_FORCEINLINE
179 void
180 GatherReg<eInstructionSet::eIS_Sse2, T,
181  typename boost::enable_if<
182  typename boost::mpl::and_<typename boost::is_integral<T>::type,
183  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
184  >::type
185  >::type
186 >
187 ::act(const T* baseAddress, const Sse2Type<ipUInt32>::Type& regIdx,
188  typename Sse2Type<T>::Type& out)
189 {
190  out = _mm_setr_epi32(*(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 0))),
191  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 4))),
192  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 8))),
193  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 12))));
194 }
195 
196 // gather implementation for SSE2 for real32 type
197 IPSDK_FORCEINLINE
198 void
199 GatherReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
200  const ipReal32* baseAddress, const ipUInt32* indexes,
201  Sse2Type<ipReal32>::Type& out)
202 {
203  out = _mm_set_ps(*(baseAddress + indexes[3]),
204  *(baseAddress + indexes[2]),
205  *(baseAddress + indexes[1]),
206  *(baseAddress + indexes[0]));
207 }
208 
209 // gather implementation for SSE2 for real32 type
210 IPSDK_FORCEINLINE
211 void
212 GatherReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
213  const ipReal32* baseAddress, const Sse2Type<ipUInt32>::Type& regIdx,
214  Sse2Type<ipReal32>::Type& out)
215 {
216  //ipUInt32 indexes[4];
217  //UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx, indexes);
218  out = _mm_setr_ps(*(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 0))),
219  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 4))),
220  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 8))),
221  *(baseAddress + _mm_cvtsi128_si32(_mm_srli_si128(regIdx, 12))));
222 }
223 
224 // gather implementation for SSE2 for real64 type
225 IPSDK_FORCEINLINE
226 void
227 GatherReg<eInstructionSet::eIS_Sse2, ipReal64>::act(
228  const ipReal64* baseAddress, const ipUInt32* indexes,
229  Sse2Type<ipReal64>::Type& out)
230 {
231  out = _mm_set_pd(*(baseAddress + indexes[1]),
232  *(baseAddress + indexes[0]));
233 }
234 
237 
238 } // end of namespace detail
239 } // end of namespace simd
240 } // end of namespace ipsdk
241 
242 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERREGIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53