IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
ScatterRegImpl.h
1 // ScatterRegImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_SCATTERREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_SCATTERREGIMPL_H__
17 
22 
23 #include <boost/mpl/and.hpp>
24 #include <boost/type_traits/is_same.hpp>
25 #include <boost/type_traits/is_signed.hpp>
26 
27 namespace ipsdk {
28 namespace simd {
29 namespace detail {
30 
33 
34 template <typename T>
35 IPSDK_FORCEINLINE
36 void
37 ScatterReg<eInstructionSet::eIS_Sse2, T,
38  typename boost::enable_if<
39  typename boost::mpl::and_<typename boost::is_integral<T>::type,
40  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
41  >::type
42  >::type
43 >
44 ::act(const typename Sse2Type<T>::Type& in,
45  const ipUInt32* indexes,
46  T* const outBaseAddr)
47 {
48  T buffer[16];
49  _mm_storeu_si128(reinterpret_cast<typename Sse2Type<T>::Type*>(buffer), in);
50 
51  outBaseAddr[indexes[0]] = buffer[0];
52  outBaseAddr[indexes[1]] = buffer[1];
53  outBaseAddr[indexes[2]] = buffer[2];
54  outBaseAddr[indexes[3]] = buffer[3];
55  outBaseAddr[indexes[4]] = buffer[4];
56  outBaseAddr[indexes[5]] = buffer[5];
57  outBaseAddr[indexes[6]] = buffer[6];
58  outBaseAddr[indexes[7]] = buffer[7];
59  outBaseAddr[indexes[8]] = buffer[8];
60  outBaseAddr[indexes[9]] = buffer[9];
61  outBaseAddr[indexes[10]] = buffer[10];
62  outBaseAddr[indexes[11]] = buffer[11];
63  outBaseAddr[indexes[12]] = buffer[12];
64  outBaseAddr[indexes[13]] = buffer[13];
65  outBaseAddr[indexes[14]] = buffer[14];
66  outBaseAddr[indexes[15]] = buffer[15];
67 }
68 
69 template <typename T>
70 IPSDK_FORCEINLINE
71 void
72 ScatterReg<eInstructionSet::eIS_Sse2, T,
73  typename boost::enable_if<
74  typename boost::mpl::and_<typename boost::is_integral<T>::type,
75  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
76  >::type
77  >::type
78 >
79 ::act(const typename Sse2Type<T>::Type& in,
80  const Sse2Type<ipUInt32>::Type& regIdx1,
81  const Sse2Type<ipUInt32>::Type& regIdx2,
82  const Sse2Type<ipUInt32>::Type& regIdx3,
83  const Sse2Type<ipUInt32>::Type& regIdx4,
84  T* const outBaseAddr)
85 {
86  ipUInt32 indexes[16];
87  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
88  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
89  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx3, indexes+8);
90  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx4, indexes+12);
91 
92  ScatterReg<eInstructionSet::eIS_Sse2, T>::act(in, indexes, outBaseAddr);
93 }
94 
95 template <typename T>
96 IPSDK_FORCEINLINE
97 void
98 ScatterReg<eInstructionSet::eIS_Sse2, T,
99  typename boost::enable_if<
100  typename boost::mpl::and_<typename boost::is_integral<T>::type,
101  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
102  >::type
103  >::type
104 >
105 ::act(const typename Sse2Type<T>::Type& in,
106  const ipUInt32* indexes,
107  T* const outBaseAddr)
108 {
109  T buffer[8];
110  _mm_storeu_si128(reinterpret_cast<typename Sse2Type<T>::Type*>(buffer), in);
111 
112  outBaseAddr[indexes[0]] = buffer[0];
113  outBaseAddr[indexes[1]] = buffer[1];
114  outBaseAddr[indexes[2]] = buffer[2];
115  outBaseAddr[indexes[3]] = buffer[3];
116  outBaseAddr[indexes[4]] = buffer[4];
117  outBaseAddr[indexes[5]] = buffer[5];
118  outBaseAddr[indexes[6]] = buffer[6];
119  outBaseAddr[indexes[7]] = buffer[7];
120 }
121 
122 template <typename T>
123 IPSDK_FORCEINLINE
124 void
125 ScatterReg<eInstructionSet::eIS_Sse2, T,
126  typename boost::enable_if<
127  typename boost::mpl::and_<typename boost::is_integral<T>::type,
128  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
129  >::type
130  >::type
131 >
132 ::act(const typename Sse2Type<T>::Type& in,
133  const Sse2Type<ipUInt32>::Type& regIdx1,
134  const Sse2Type<ipUInt32>::Type& regIdx2,
135  T* const outBaseAddr)
136 {
137  ipUInt32 indexes[8];
138  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx1, indexes);
139  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx2, indexes+4);
140 
141  ScatterReg<eInstructionSet::eIS_Sse2, T>::act(in, indexes, outBaseAddr);
142 }
143 
144 template <typename T>
145 IPSDK_FORCEINLINE
146 void
147 ScatterReg<eInstructionSet::eIS_Sse2, T,
148  typename boost::enable_if<
149  typename boost::mpl::and_<typename boost::is_integral<T>::type,
150  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
151  >::type
152  >::type
153 >
154 ::act(const typename Sse2Type<T>::Type& in, const ipUInt32* indexes,
155  T* const outBaseAddr)
156 {
157  T buffer[4];
158  _mm_storeu_si128(reinterpret_cast<typename Sse2Type<T>::Type*>(buffer), in);
159 
160  outBaseAddr[indexes[0]] = buffer[0];
161  outBaseAddr[indexes[1]] = buffer[1];
162  outBaseAddr[indexes[2]] = buffer[2];
163  outBaseAddr[indexes[3]] = buffer[3];
164 }
165 
166 template <typename T>
167 IPSDK_FORCEINLINE
168 void
169 ScatterReg<eInstructionSet::eIS_Sse2, T,
170  typename boost::enable_if<
171  typename boost::mpl::and_<typename boost::is_integral<T>::type,
172  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
173  >::type
174  >::type
175 >
176 ::act(
177  const typename Sse2Type<T>::Type& in,
178  const Sse2Type<ipUInt32>::Type& regIdx,
179  T* const outBaseAddr)
180 {
181  ipUInt32 indexes[4];
182  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx, indexes);
183 
184  ScatterReg<eInstructionSet::eIS_Sse2, T>::act(in, indexes, outBaseAddr);
185 }
186 
187 // Scatter implementation for SSE2 for real32 type
188 IPSDK_FORCEINLINE
189 void
190 ScatterReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
191  const Sse2Type<ipReal32>::Type& in,
192  const ipUInt32* indexes,
193  ipReal32* const outBaseAddr)
194 {
195  ipReal32 buffer[4];
196  _mm_storeu_ps(buffer, in);
197  outBaseAddr[indexes[0]] = buffer[0];
198  outBaseAddr[indexes[1]] = buffer[1];
199  outBaseAddr[indexes[2]] = buffer[2];
200  outBaseAddr[indexes[3]] = buffer[3];
201 }
202 
203 // Scatter implementation for SSE2 for real32 type
204 IPSDK_FORCEINLINE
205 void
206 ScatterReg<eInstructionSet::eIS_Sse2, ipReal32>::act(
207  const Sse2Type<ipReal32>::Type& in,
208  const Sse2Type<ipUInt32>::Type& regIdx,
209  ipReal32* const outBaseAddr)
210 {
211  ipUInt32 indexes[4];
212  UnloadReg<eInstructionSet::eIS_Sse2, ipUInt32>::act(regIdx, indexes);
213  ScatterReg<eInstructionSet::eIS_Sse2, ipReal32>::act(in, indexes, outBaseAddr);
214 }
215 
216 // Scatter implementation for SSE2 for real64 type
217 IPSDK_FORCEINLINE
218 void
219 ScatterReg<eInstructionSet::eIS_Sse2, ipReal64>::act(
220  const Sse2Type<ipReal64>::Type& in,
221  const ipUInt32* indexes,
222  ipReal64* const outBaseAddr)
223 {
224  ipReal64 buffer[2];
225  _mm_storeu_pd(buffer, in);
226 
227  outBaseAddr[indexes[0]] = buffer[0];
228  outBaseAddr[indexes[1]] = buffer[1];
229 }
230 
233 
234 } // end of namespace detail
235 } // end of namespace simd
236 } // end of namespace ipsdk
237 
238 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_SCATTERREGIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53