IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
ScatterRegImpl.h
1 // ScatterRegImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__
17 
22 
23 #include <boost/mpl/and.hpp>
24 #include <boost/type_traits/is_same.hpp>
25 #include <boost/type_traits/is_signed.hpp>
26 
27 namespace ipsdk {
28 namespace simd {
29 namespace detail {
30 
33 
34 template <typename T>
35 IPSDK_FORCEINLINE
36 void
37 ScatterReg<eInstructionSet::eIS_Avx2, T,
38  typename boost::enable_if<
39  typename boost::mpl::and_<typename boost::is_integral<T>::type,
40  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
41  >::type
42  >::type
43 >
44 ::act(const typename AvxType<T>::Type& in,
45  const ipUInt32* indexes,
46  T* const outBaseAddr)
47 {
48  T buffer[32];
49  _mm256_storeu_si256(reinterpret_cast<typename AvxType<T>::Type*>(buffer), in);
50 
51  /*outBaseAddr[indexes[0]] = buffer[0];
52  outBaseAddr[indexes[1]] = buffer[1];
53  outBaseAddr[indexes[2]] = buffer[2];
54  outBaseAddr[indexes[3]] = buffer[3];
55  outBaseAddr[indexes[4]] = buffer[4];
56  outBaseAddr[indexes[5]] = buffer[5];
57  outBaseAddr[indexes[6]] = buffer[6];
58  outBaseAddr[indexes[7]] = buffer[7];
59  outBaseAddr[indexes[8]] = buffer[8];
60  outBaseAddr[indexes[9]] = buffer[9];
61  outBaseAddr[indexes[10]] = buffer[10];
62  outBaseAddr[indexes[11]] = buffer[11];
63  outBaseAddr[indexes[12]] = buffer[12];
64  outBaseAddr[indexes[13]] = buffer[13];
65  outBaseAddr[indexes[14]] = buffer[14];
66  outBaseAddr[indexes[15]] = buffer[15];
67  outBaseAddr[indexes[16]] = buffer[16];
68  outBaseAddr[indexes[17]] = buffer[17];
69  outBaseAddr[indexes[18]] = buffer[18];
70  outBaseAddr[indexes[19]] = buffer[19];
71  outBaseAddr[indexes[20]] = buffer[20];
72  outBaseAddr[indexes[21]] = buffer[21];
73  outBaseAddr[indexes[22]] = buffer[22];
74  outBaseAddr[indexes[23]] = buffer[23];
75  outBaseAddr[indexes[24]] = buffer[24];
76  outBaseAddr[indexes[25]] = buffer[25];
77  outBaseAddr[indexes[26]] = buffer[26];
78  outBaseAddr[indexes[27]] = buffer[27];
79  outBaseAddr[indexes[28]] = buffer[28];
80  outBaseAddr[indexes[29]] = buffer[29];
81  outBaseAddr[indexes[30]] = buffer[30];
82  outBaseAddr[indexes[31]] = buffer[31];*/
83 
84  for(ipsdk::ipUInt8 i=0; i<32; ++i)
85  outBaseAddr[indexes[i]] = buffer[i];
86 }
87 
88 template <typename T>
89 IPSDK_FORCEINLINE
90 void
91 ScatterReg<eInstructionSet::eIS_Avx2, T,
92  typename boost::enable_if<
93  typename boost::mpl::and_<typename boost::is_integral<T>::type,
94  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<1> >::type
95  >::type
96  >::type
97 >
98 ::act(const typename AvxType<T>::Type& in,
99  const AvxType<ipUInt32>::Type& regIdx1,
100  const AvxType<ipUInt32>::Type& regIdx2,
101  const AvxType<ipUInt32>::Type& regIdx3,
102  const AvxType<ipUInt32>::Type& regIdx4,
103  T* const outBaseAddr)
104 {
105  ipUInt32 indexes[32];
106  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
107  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
108  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx3, indexes+16);
109  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx4, indexes+24);
110 
111  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
112 }
113 
114 template <typename T>
115 IPSDK_FORCEINLINE
116 void
117 ScatterReg<eInstructionSet::eIS_Avx2, T,
118  typename boost::enable_if<
119  typename boost::mpl::and_<typename boost::is_integral<T>::type,
120  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
121  >::type
122  >::type
123 >
124 ::act(const typename AvxType<T>::Type& in,
125  const ipUInt32* indexes,
126  T* const outBaseAddr)
127 {
128  T buffer[16];
129  _mm256_storeu_si256(reinterpret_cast<typename AvxType<T>::Type*>(buffer), in);
130 
131  /*outBaseAddr[indexes[0]] = buffer[0];
132  outBaseAddr[indexes[1]] = buffer[1];
133  outBaseAddr[indexes[2]] = buffer[2];
134  outBaseAddr[indexes[3]] = buffer[3];
135  outBaseAddr[indexes[4]] = buffer[4];
136  outBaseAddr[indexes[5]] = buffer[5];
137  outBaseAddr[indexes[6]] = buffer[6];
138  outBaseAddr[indexes[7]] = buffer[7];
139  outBaseAddr[indexes[8]] = buffer[8];
140  outBaseAddr[indexes[9]] = buffer[9];
141  outBaseAddr[indexes[10]] = buffer[10];
142  outBaseAddr[indexes[11]] = buffer[11];
143  outBaseAddr[indexes[12]] = buffer[12];
144  outBaseAddr[indexes[13]] = buffer[13];
145  outBaseAddr[indexes[14]] = buffer[14];
146  outBaseAddr[indexes[15]] = buffer[15];*/
147  for(ipsdk::ipUInt8 i=0; i<16; ++i)
148  outBaseAddr[indexes[i]] = buffer[i];
149 }
150 
151 template <typename T>
152 IPSDK_FORCEINLINE
153 void
154 ScatterReg<eInstructionSet::eIS_Avx2, T,
155  typename boost::enable_if<
156  typename boost::mpl::and_<typename boost::is_integral<T>::type,
157  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<2> >::type
158  >::type
159  >::type
160 >
161 ::act(const typename AvxType<T>::Type& in,
162  const AvxType<ipUInt32>::Type& regIdx1,
163  const AvxType<ipUInt32>::Type& regIdx2,
164  T* const outBaseAddr)
165 {
166  ipUInt32 indexes[16];
167  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx1, indexes);
168  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx2, indexes+8);
169 
170  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
171 }
172 
173 template <typename T>
174 IPSDK_FORCEINLINE
175 void
176 ScatterReg<eInstructionSet::eIS_Avx2, T,
177  typename boost::enable_if<
178  typename boost::mpl::and_<typename boost::is_integral<T>::type,
179  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
180  >::type
181  >::type
182 >
183 ::act(const typename AvxType<T>::Type& in, const ipUInt32* indexes,
184  T* const outBaseAddr)
185 {
186  T buffer[8];
187  _mm256_storeu_si256(reinterpret_cast<typename AvxType<T>::Type*>(buffer), in);
188 
189  /*outBaseAddr[indexes[0]] = buffer[0];
190  outBaseAddr[indexes[1]] = buffer[1];
191  outBaseAddr[indexes[2]] = buffer[2];
192  outBaseAddr[indexes[3]] = buffer[3];
193  outBaseAddr[indexes[4]] = buffer[4];
194  outBaseAddr[indexes[5]] = buffer[5];
195  outBaseAddr[indexes[6]] = buffer[6];
196  outBaseAddr[indexes[7]] = buffer[7];*/
197  for(ipsdk::ipUInt8 i=0; i<8; ++i)
198  outBaseAddr[indexes[i]] = buffer[i];
199 }
200 
201 template <typename T>
202 IPSDK_FORCEINLINE
203 void
204 ScatterReg<eInstructionSet::eIS_Avx2, T,
205  typename boost::enable_if<
206  typename boost::mpl::and_<typename boost::is_integral<T>::type,
207  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
208  >::type
209  >::type
210 >
211 ::act(
212  const typename AvxType<T>::Type& in,
213  const AvxType<ipUInt32>::Type& regIdx,
214  T* const outBaseAddr)
215 {
216  ipUInt32 indexes[8];
217  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx, indexes);
218 
219  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(in, indexes, outBaseAddr);
220 }
221 
222 // Scatter implementation for AVX2 for real32 type
223 IPSDK_FORCEINLINE
224 void
225 ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
226  const AvxType<ipReal32>::Type& in,
227  const ipUInt32* indexes,
228  ipReal32* const outBaseAddr)
229 {
230  ipReal32 buffer[8];
231  _mm256_storeu_ps(buffer, in);
232 
233  /*outBaseAddr[indexes[0]] = buffer[0];
234  outBaseAddr[indexes[1]] = buffer[1];
235  outBaseAddr[indexes[2]] = buffer[2];
236  outBaseAddr[indexes[3]] = buffer[3];
237  outBaseAddr[indexes[4]] = buffer[4];
238  outBaseAddr[indexes[5]] = buffer[5];
239  outBaseAddr[indexes[6]] = buffer[6];
240  outBaseAddr[indexes[7]] = buffer[7];*/
241  for(ipsdk::ipUInt8 i=0; i<8; ++i)
242  outBaseAddr[indexes[i]] = buffer[i];
243 }
244 
245 // Scatter implementation for AVX2 for real32 type
246 IPSDK_FORCEINLINE
247 void
248 ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(
249  const AvxType<ipReal32>::Type& in,
250  const AvxType<ipUInt32>::Type& regIdx,
251  ipReal32* const outBaseAddr)
252 {
253  ipUInt32 indexes[8];
254  UnloadReg<eInstructionSet::eIS_Avx2, ipUInt32>::act(regIdx, indexes);
255  ScatterReg<eInstructionSet::eIS_Avx2, ipReal32>::act(in, indexes, outBaseAddr);
256 }
257 
258 // Scatter implementation for AVX2 for real64 type
259 IPSDK_FORCEINLINE
260 void
261 ScatterReg<eInstructionSet::eIS_Avx2, ipReal64>::act(
262  const AvxType<ipReal64>::Type& in,
263  const ipUInt32* indexes,
264  ipReal64* const outBaseAddr)
265 {
266  ipReal64 buffer[4];
267  _mm256_storeu_pd(buffer, in);
268 
269  /*outBaseAddr[indexes[0]] = buffer[0];
270  outBaseAddr[indexes[1]] = buffer[1];
271  outBaseAddr[indexes[2]] = buffer[2];
272  outBaseAddr[indexes[3]] = buffer[3];*/
273  for(ipsdk::ipUInt8 i=0; i<4; ++i)
274  outBaseAddr[indexes[i]] = buffer[i];
275 }
276 
279 
280 } // end of namespace detail
281 } // end of namespace simd
282 } // end of namespace ipsdk
283 
284 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERREGIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
uint8_t ipUInt8
Base types definition.
Definition: BaseTypes.h:49
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
unload function; unloads a pack into a memory buffer
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53