IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
ScatterPackImpl.h
Go to the documentation of this file.
1 // ScatterPackImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERPACKIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_SCATTERPACKIMPL_H__
17 
23 
24 namespace ipsdk {
25 namespace simd {
26 namespace detail {
27 
30 
31 template <typename T>
32 IPSDK_FORCEINLINE
33 void
34 ScatterPack<eInstructionSet::eIS_Avx2, T,
35  typename boost::enable_if_c<sizeof(T) == 1>::type
36 >::act(
37  const BasePack<ePackType::ePT_Avx, T>& in,
38  const ipUInt32* indexes,
39  T* const outBaseAddr)
40 {
41  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
42  in._val[0],
43  indexes,
44  outBaseAddr);
45 }
46 
47 template <typename T>
48 IPSDK_FORCEINLINE
49 void
50 ScatterPack<eInstructionSet::eIS_Avx2, T,
51  typename boost::enable_if_c<sizeof(T) == 1>::type
52 >::act(
53  const BasePack<ePackType::ePT_Avx, T>& in,
54  const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
55  T* const outBaseAddr)
56 {
57  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
58  in._val[0],
59  packIdx._val[0], packIdx._val[1],
60  packIdx._val[2], packIdx._val[3],
61  outBaseAddr);
62 }
63 
64 template <typename T>
65 IPSDK_FORCEINLINE
66 void
67 ScatterPack<eInstructionSet::eIS_Avx2, T,
68  typename boost::enable_if_c<sizeof(T) == 2>::type
69 >::act(
70  const BasePack<ePackType::ePT_Avx, T>& in,
71  const ipUInt32* indexes,
72  T* const outBaseAddr)
73 {
74  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
75  in._val[0], indexes, outBaseAddr);
76  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
77  in._val[1], indexes+16, outBaseAddr);
78 }
79 
80 template <typename T>
81 IPSDK_FORCEINLINE
82 void
83 ScatterPack<eInstructionSet::eIS_Avx2, T,
84  typename boost::enable_if_c<sizeof(T) == 2>::type
85 >::act(
86  const BasePack<ePackType::ePT_Avx, T>& in,
87  const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
88  T* const outBaseAddr)
89 {
90  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
91  in._val[0], packIdx._val[0], packIdx._val[1], outBaseAddr);
92  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
93  in._val[1], packIdx._val[2], packIdx._val[3], outBaseAddr);
94 }
95 
96 template <typename T>
97 IPSDK_FORCEINLINE
98 void
99 ScatterPack<eInstructionSet::eIS_Avx2, T,
100  typename boost::enable_if_c<sizeof(T) == 4>::type
101 >::act(
102  const BasePack<ePackType::ePT_Avx, T>& in,
103  const ipUInt32* indexes,
104  T* const outBaseAddr)
105 {
106  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
107  in._val[0], indexes, outBaseAddr);
108  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
109  in._val[1], indexes+8, outBaseAddr);
110  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
111  in._val[2], indexes+16, outBaseAddr);
112  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
113  in._val[3], indexes+24, outBaseAddr);
114 }
115 
116 template <typename T>
117 IPSDK_FORCEINLINE
118 void
119 ScatterPack<eInstructionSet::eIS_Avx2, T,
120  typename boost::enable_if_c<sizeof(T) == 4>::type
121 >::act(
122  const BasePack<ePackType::ePT_Avx, T>& in,
123  const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
124  T* const outBaseAddr)
125 {
126  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
127  in._val[0], packIdx._val[0], outBaseAddr);
128  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
129  in._val[1], packIdx._val[1], outBaseAddr);
130  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
131  in._val[2], packIdx._val[2], outBaseAddr);
132  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
133  in._val[3], packIdx._val[3], outBaseAddr);
134 }
135 
136 template <typename T>
137 IPSDK_FORCEINLINE
138 void
139 ScatterPack<eInstructionSet::eIS_Avx2, T,
140  typename boost::enable_if_c<sizeof(T) == 8>::type
141 >::act(
142  const BasePack<ePackType::ePT_Avx, T>& in,
143  const ipUInt32* indexes,
144  T* const outBaseAddr)
145 {
146  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
147  in._val[0], indexes, outBaseAddr);
148  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
149  in._val[1], indexes+4, outBaseAddr);
150  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
151  in._val[2], indexes+8, outBaseAddr);
152  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
153  in._val[3], indexes+12, outBaseAddr);
154  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
155  in._val[4], indexes+16, outBaseAddr);
156  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
157  in._val[5], indexes+20, outBaseAddr);
158  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
159  in._val[6], indexes+24, outBaseAddr);
160  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
161  in._val[7], indexes+28, outBaseAddr);
162 }
163 
164 template <typename T>
165 IPSDK_FORCEINLINE
166 void
167 ScatterPack<eInstructionSet::eIS_Avx2, T,
168  typename boost::enable_if_c<sizeof(T) == 8>::type
169 >::act(
170  const BasePack<ePackType::ePT_Avx, T>& in,
171  const BasePack<ePackType::ePT_Avx, ipUInt32>& packIdx,
172  T* const outBaseAddr)
173 {
174  ipUInt32 indexes[32];
175  UnloadPack<eInstructionSet::eIS_Avx2, ipUInt32>::act(packIdx, indexes);
176 
177  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
178  in._val[0], indexes, outBaseAddr);
179  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
180  in._val[1], indexes+4, outBaseAddr);
181  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
182  in._val[2], indexes+8, outBaseAddr);
183  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
184  in._val[3], indexes+12, outBaseAddr);
185  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
186  in._val[4], indexes+16, outBaseAddr);
187  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
188  in._val[5], indexes+20, outBaseAddr);
189  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
190  in._val[6], indexes+24, outBaseAddr);
191  ScatterReg<eInstructionSet::eIS_Avx2, T>::act(
192  in._val[7], indexes+28, outBaseAddr);
193 }
194 
197 
198 } // end of namespace detail
199 } // end of namespace simd
200 } // end of namespace ipsdk
201 
202 #endif // __IPSDKUTIL_INSTRUCTIONSET_SCATTER_DETAIL_AVX2_SCATTERPACKIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53