IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
GatherPackImpl.h
Go to the documentation of this file.
1 // GatherPackImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERPACKIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_GATHERPACKIMPL_H__
17 
22 
23 namespace ipsdk {
24 namespace simd {
25 namespace detail {
26 
29 
30 template <typename T>
31 IPSDK_FORCEINLINE
32 void
33 GatherPack<eInstructionSet::eIS_Avx512, T,
34  typename boost::enable_if_c<sizeof(T) == 1>::type
35 >::act(
36  const T* baseAddress,
37  const ipUInt32* indexes,
38  BasePack<ePackType::ePT_Avx512, T>& out)
39 {
40  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
41  baseAddress,
42  indexes,
43  out._val[0]);
44 }
45 
46 template <typename T>
47 IPSDK_FORCEINLINE
48 void
49 GatherPack<eInstructionSet::eIS_Avx512, T,
50  typename boost::enable_if_c<sizeof(T) == 1>::type
51 >::act(
52  const T* baseAddress,
53  const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
54  BasePack<ePackType::ePT_Avx512, T>& out)
55 {
56  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
57  baseAddress,
58  packIdx._val[0], packIdx._val[1],
59  packIdx._val[2], packIdx._val[3],
60  out._val[0]);
61 }
62 
63 template <typename T>
64 IPSDK_FORCEINLINE
65 void
66 GatherPack<eInstructionSet::eIS_Avx512, T,
67  typename boost::enable_if_c<sizeof(T) == 2>::type
68 >::act(
69  const T* baseAddress,
70  const ipUInt32* indexes,
71  BasePack<ePackType::ePT_Avx512, T>& out)
72 {
73  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
74  baseAddress, indexes, out._val[0]);
75  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
76  baseAddress, indexes+32, out._val[1]);
77 }
78 
79 template <typename T>
80 IPSDK_FORCEINLINE
81 void
82 GatherPack<eInstructionSet::eIS_Avx512, T,
83  typename boost::enable_if_c<sizeof(T) == 2>::type
84 >::act(
85  const T* baseAddress,
86  const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
87  BasePack<ePackType::ePT_Avx512, T>& out)
88 {
89  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
90  baseAddress, packIdx._val[0], packIdx._val[1], out._val[0]);
91  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
92  baseAddress, packIdx._val[2], packIdx._val[3], out._val[1]);
93 }
94 
95 template <typename T>
96 IPSDK_FORCEINLINE
97 void
98 GatherPack<eInstructionSet::eIS_Avx512, T,
99  typename boost::enable_if_c<sizeof(T) == 4>::type
100 >::act(
101  const T* baseAddress,
102  const ipUInt32* indexes,
103  BasePack<ePackType::ePT_Avx512, T>& out)
104 {
105  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
106  baseAddress, indexes, out._val[0]);
107  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
108  baseAddress, indexes+16, out._val[1]);
109  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
110  baseAddress, indexes+32, out._val[2]);
111  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
112  baseAddress, indexes+48, out._val[3]);
113 }
114 
115 template <typename T>
116 IPSDK_FORCEINLINE
117 void
118 GatherPack<eInstructionSet::eIS_Avx512, T,
119  typename boost::enable_if_c<sizeof(T) == 4>::type
120 >::act(
121  const T* baseAddress,
122  const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
123  BasePack<ePackType::ePT_Avx512, T>& out)
124 {
125  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
126  baseAddress, packIdx._val[0], out._val[0]);
127  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
128  baseAddress, packIdx._val[1], out._val[1]);
129  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
130  baseAddress, packIdx._val[2], out._val[2]);
131  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
132  baseAddress, packIdx._val[3], out._val[3]);
133 }
134 
135 template <typename T>
136 IPSDK_FORCEINLINE
137 void
138 GatherPack<eInstructionSet::eIS_Avx512, T,
139  typename boost::enable_if_c<sizeof(T) == 8>::type
140 >::act(
141  const T* baseAddress,
142  const ipUInt32* indexes,
143  BasePack<ePackType::ePT_Avx512, T>& out)
144 {
145  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
146  baseAddress, indexes, out._val[0]);
147  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
148  baseAddress, indexes+8, out._val[1]);
149  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
150  baseAddress, indexes+16, out._val[2]);
151  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
152  baseAddress, indexes+24, out._val[3]);
153  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
154  baseAddress, indexes+32, out._val[4]);
155  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
156  baseAddress, indexes+40, out._val[5]);
157  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
158  baseAddress, indexes+48, out._val[6]);
159  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
160  baseAddress, indexes+56, out._val[7]);
161 }
162 
163 template <typename T>
164 IPSDK_FORCEINLINE
165 void
166 GatherPack<eInstructionSet::eIS_Avx512, T,
167  typename boost::enable_if_c<sizeof(T) == 8>::type
168 >::act(
169  const T* baseAddress,
170  const BasePack<ePackType::ePT_Avx512, ipUInt32>& packIdx,
171  BasePack<ePackType::ePT_Avx512, T>& out)
172 {
173  const __m512i duplicateHiMask = _mm512_setr_epi32(
174  8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
175 
176  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
177  baseAddress, _mm512_castsi512_si256(packIdx._val[0]), out._val[0]);
178  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
179  baseAddress,
180  _mm512_castsi512_si256(_mm512_permutex2var_epi32(
181  packIdx._val[0], duplicateHiMask, packIdx._val[0])),
182  out._val[1]);
183  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
184  baseAddress, _mm512_castsi512_si256(packIdx._val[1]), out._val[2]);
185  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
186  baseAddress,
187  _mm512_castsi512_si256(_mm512_permutex2var_epi32(
188  packIdx._val[1], duplicateHiMask, packIdx._val[1])),
189  out._val[3]);
190  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
191  baseAddress, _mm512_castsi512_si256(packIdx._val[2]), out._val[4]);
192  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
193  baseAddress,
194  _mm512_castsi512_si256(_mm512_permutex2var_epi32(
195  packIdx._val[2], duplicateHiMask, packIdx._val[2])),
196  out._val[5]);
197  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
198  baseAddress, _mm512_castsi512_si256(packIdx._val[3]), out._val[6]);
199  GatherReg<eInstructionSet::eIS_Avx512, T>::act(
200  baseAddress,
201  _mm512_castsi512_si256(_mm512_permutex2var_epi32(
202  packIdx._val[3], duplicateHiMask, packIdx._val[3])),
203  out._val[7]);
204 }
205 
208 
209 } // end of namespace detail
210 } // end of namespace simd
211 } // end of namespace ipsdk
212 
213 #endif // __IPSDKUTIL_INSTRUCTIONSET_GATHER_DETAIL_AVX512_GATHERPACKIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition of import/export macro for library.
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53