IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
GatherPackImpl.h
Go to the documentation of this file.
1 // GatherPackImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERPACKIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_SSE2_GATHERPACKIMPL_H__
17 
23 
24 namespace ipsdk {
25 namespace simd {
26 namespace detail {
27 
30 
31 template <typename T>
32 IPSDK_FORCEINLINE
33 void
34 GatherPack<eInstructionSet::eIS_Sse2, T,
35  typename boost::enable_if_c<sizeof(T) == 1>::type
36 >::act(
37  const T* baseAddress,
38  const ipUInt32* indexes,
39  BasePack<ePackType::ePT_Sse, T>& out)
40 {
41  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
42  baseAddress,
43  indexes,
44  out._val[0]);
45 }
46 
47 template <typename T>
48 IPSDK_FORCEINLINE
49 void
50 GatherPack<eInstructionSet::eIS_Sse2, T,
51  typename boost::enable_if_c<sizeof(T) == 1>::type
52 >::act(
53  const T* baseAddress,
54  const BasePack<ePackType::ePT_Sse, ipUInt32>& packIdx,
55  BasePack<ePackType::ePT_Sse, T>& out)
56 {
57  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
58  baseAddress,
59  packIdx._val[0], packIdx._val[1],
60  packIdx._val[2], packIdx._val[3],
61  out._val[0]);
62 }
63 
64 template <typename T>
65 IPSDK_FORCEINLINE
66 void
67 GatherPack<eInstructionSet::eIS_Sse2, T,
68  typename boost::enable_if_c<sizeof(T) == 2>::type
69 >::act(
70  const T* baseAddress,
71  const ipUInt32* indexes,
72  BasePack<ePackType::ePT_Sse, T>& out)
73 {
74  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
75  baseAddress, indexes, out._val[0]);
76  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
77  baseAddress, indexes+8, out._val[1]);
78 }
79 
80 template <typename T>
81 IPSDK_FORCEINLINE
82 void
83 GatherPack<eInstructionSet::eIS_Sse2, T,
84  typename boost::enable_if_c<sizeof(T) == 2>::type
85 >::act(
86  const T* baseAddress,
87  const BasePack<ePackType::ePT_Sse, ipUInt32>& packIdx,
88  BasePack<ePackType::ePT_Sse, T>& out)
89 {
90  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
91  baseAddress, packIdx._val[0], packIdx._val[1], out._val[0]);
92  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
93  baseAddress, packIdx._val[2], packIdx._val[3], out._val[1]);
94 }
95 
96 template <typename T>
97 IPSDK_FORCEINLINE
98 void
99 GatherPack<eInstructionSet::eIS_Sse2, T,
100  typename boost::enable_if_c<sizeof(T) == 4>::type
101 >::act(
102  const T* baseAddress,
103  const ipUInt32* indexes,
104  BasePack<ePackType::ePT_Sse, T>& out)
105 {
106  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
107  baseAddress, indexes, out._val[0]);
108  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
109  baseAddress, indexes+4, out._val[1]);
110  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
111  baseAddress, indexes+8, out._val[2]);
112  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
113  baseAddress, indexes+12, out._val[3]);
114 }
115 
116 template <typename T>
117 IPSDK_FORCEINLINE
118 void
119 GatherPack<eInstructionSet::eIS_Sse2, T,
120  typename boost::enable_if_c<sizeof(T) == 4>::type
121 >::act(
122  const T* baseAddress,
123  const BasePack<ePackType::ePT_Sse, ipUInt32>& packIdx,
124  BasePack<ePackType::ePT_Sse, T>& out)
125 {
126  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
127  baseAddress, packIdx._val[0], out._val[0]);
128  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
129  baseAddress, packIdx._val[1], out._val[1]);
130  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
131  baseAddress, packIdx._val[2], out._val[2]);
132  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
133  baseAddress, packIdx._val[3], out._val[3]);
134 }
135 
136 template <typename T>
137 IPSDK_FORCEINLINE
138 void
139 GatherPack<eInstructionSet::eIS_Sse2, T,
140  typename boost::enable_if_c<sizeof(T) == 8>::type
141 >::act(
142  const T* baseAddress,
143  const ipUInt32* indexes,
144  BasePack<ePackType::ePT_Sse, T>& out)
145 {
146  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
147  baseAddress, indexes, out._val[0]);
148  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
149  baseAddress, indexes+2, out._val[1]);
150  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
151  baseAddress, indexes+4, out._val[2]);
152  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
153  baseAddress, indexes+6, out._val[3]);
154  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
155  baseAddress, indexes+8, out._val[4]);
156  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
157  baseAddress, indexes+10, out._val[5]);
158  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
159  baseAddress, indexes+12, out._val[6]);
160  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
161  baseAddress, indexes+14, out._val[7]);
162 }
163 
164 template <typename T>
165 IPSDK_FORCEINLINE
166 void
167 GatherPack<eInstructionSet::eIS_Sse2, T,
168  typename boost::enable_if_c<sizeof(T) == 8>::type
169 >::act(
170  const T* baseAddress,
171  const BasePack<ePackType::ePT_Sse, ipUInt32>& packIdx,
172  BasePack<ePackType::ePT_Sse, T>& out)
173 {
174  ipUInt32 indexes[16];
175  UnloadPack<eInstructionSet::eIS_Sse2, ipUInt32>::act(packIdx, indexes);
176 
177  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
178  baseAddress, indexes, out._val[0]);
179  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
180  baseAddress, indexes+2, out._val[1]);
181  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
182  baseAddress, indexes+4, out._val[2]);
183  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
184  baseAddress, indexes+6, out._val[3]);
185  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
186  baseAddress, indexes+8, out._val[4]);
187  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
188  baseAddress, indexes+10, out._val[5]);
189  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
190  baseAddress, indexes+12, out._val[6]);
191  GatherReg<eInstructionSet::eIS_Sse2, T>::act(
192  baseAddress, indexes+14, out._val[7]);
193 }
194 
197 
198 } // end of namespace detail
199 } // end of namespace simd
200 } // end of namespace ipsdk
201 
202 #endif // __IPSDKUTIL_INSTRUCTIONSET_GATHER_DETAIL_SSE2_GATHERPACKIMPL_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition of import/export macro for library.
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53