IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
CastReg.h
Go to the documentation of this file.
1 // CastReg.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__
17 
19 
26 
27 #include <boost/mpl/and.hpp>
28 #include <boost/mpl/or.hpp>
29 #include <boost/mpl/sizeof.hpp>
30 
31 #include "immintrin.h"
32 
33 namespace ipsdk {
34 namespace simd {
35 namespace detail {
36 
39 
42 template <typename TIn, typename TOut>
43 struct CastReg<eInstructionSet::eIS_Avx512, TIn, TOut,
44  typename boost::enable_if<
45  typename boost::mpl::or_<
46  typename boost::is_same<TIn, TOut>::type,
47  typename boost::mpl::and_<
48  typename boost::is_integral<TIn>::type,
49  typename boost::mpl::and_<
50  typename boost::is_integral<TOut>::type,
51  typename boost::mpl::equal_to<
52  boost::mpl::sizeof_<TIn>,
53  boost::mpl::sizeof_<TOut>
54  >::type
55  >::type
56  >::type
57  >::type
58  >::type
59 >
60 {
61  static IPSDK_FORCEINLINE
62  void act(const typename Avx512Type<TIn>::Type& in,
63  typename Avx512Type<TOut>::Type& out)
64  {
65  out = in;
66  }
67 
68  static IPSDK_FORCEINLINE
69  void act(const typename Avx512MaskType<TIn>::Type& in,
70  typename Avx512Type<TOut>::Type& out)
71  {
73  in,
76  }
77 };
78 
81 template <typename TOut>
83  typename boost::enable_if<
84  typename boost::mpl::equal_to<
85  boost::mpl::int_<sizeof(TOut)>,
86  boost::mpl::int_<2>
87  >::type
88  >::type
89 >
90 {
91  static IPSDK_FORCEINLINE
92  void act(const Avx512Type<ipUInt8>::Type& in,
93  typename Avx512Type<TOut>::Type& outl,
94  typename Avx512Type<TOut>::Type& outh)
95  {
96  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
97  outl = _mm512_unpacklo_epi8(inPermuted, _mm512_set1_epi8(0));
98  outh = _mm512_unpackhi_epi8(inPermuted, _mm512_set1_epi8(0));
99  }
100 };
101 
104 template <typename TOut>
106  typename boost::enable_if_c<sizeof(TOut)==2>::type>
107 {
108  static IPSDK_FORCEINLINE
109  void act(const Avx512Type<ipInt8>::Type& in,
110  typename Avx512Type<TOut>::Type& outl,
111  typename Avx512Type<TOut>::Type& outh)
112  {
113  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
114  outl = _mm512_unpacklo_epi8(inPermuted, inPermuted);
115  outl = _mm512_srai_epi16(outl, 8);
116  outh = _mm512_unpackhi_epi8(inPermuted, inPermuted);
117  outh = _mm512_srai_epi16(outh, 8);
118  }
119 };
120 
123 template <>
125 {
126  static IPSDK_FORCEINLINE
127  void act(const Avx512Type<ipInt32>::Type& in,
129  {
130  out = _mm512_cvtepi32_ps(in);
131  }
132 };
133 
136 template <>
138 {
139  static IPSDK_FORCEINLINE
140  void act(const Avx512Type<ipUInt32>::Type& in,
142  {
143  out = _mm512_cvtepu32_ps(in);
144  }
145 };
146 
149 template <>
151 {
152  static IPSDK_FORCEINLINE
153  void act(const Avx512Type<ipReal32>::Type& in,
155  {
156  out = _mm512_cvttps_epi32(in);
157  }
158 };
159 
162 template <>
164 {
165  static IPSDK_FORCEINLINE
166  void act(const Avx512Type<ipReal32>::Type& in,
168  {
169  out = _mm512_cvttps_epu32(in);
170  }
171 };
172 
175 template <>
177 {
178  static IPSDK_FORCEINLINE
179  void act(const Avx512Type<ipInt32>::Type& in,
182  {
183  outl = _mm512_cvtepi32_pd(_mm512_castsi512_si256(in));
184  outh = _mm512_cvtepi32_pd(_mm512_extracti64x4_epi64(in, 1));
185  }
186 };
187 
190 template <>
192 {
193  static IPSDK_FORCEINLINE
194  void act(const Avx512Type<ipUInt32>::Type& in,
197  {
198  outl = _mm512_cvtepu32_pd(_mm512_castsi512_si256(in));
199  outh = _mm512_cvtepu32_pd(_mm512_extracti64x4_epi64(in, 1));
200  }
201 };
202 
205 template <>
207 {
208  static IPSDK_FORCEINLINE
209  void act(const Avx512Type<ipReal64>::Type& inl,
210  const Avx512Type<ipReal64>::Type& inh,
212  {
213  const __m256i outl = _mm512_cvttpd_epi32(inl);
214  const __m256i outh = _mm512_cvttpd_epi32(inh);
215  out = _mm512_inserti64x4(out, outl, 0);
216  out = _mm512_inserti64x4(out, outh, 1);
217  }
218 };
219 
222 template <>
224 {
225  static IPSDK_FORCEINLINE
226  void act(const Avx512Type<ipReal64>::Type& inl,
227  const Avx512Type<ipReal64>::Type& inh,
229  {
230  const __m256i outl = _mm512_cvttpd_epu32(inl);
231  const __m256i outh = _mm512_cvttpd_epu32(inh);
232  out = _mm512_inserti64x4(out, outl, 0);
233  out = _mm512_inserti64x4(out, outh, 1);
234  }
235 };
236 
239 template <typename TOut>
241  typename boost::enable_if_c<sizeof(TOut)==8 &&
242  boost::is_integral<TOut>::value>::type>
243 {
244  static IPSDK_FORCEINLINE
245  void act(const Avx512Type<ipInt32>::Type& in,
246  typename Avx512Type<TOut>::Type& outl,
247  typename Avx512Type<TOut>::Type& outh)
248  {
249  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
250 
251  const Avx512Type<ipInt32>::Type zero = _mm512_setzero_si512();
256  outl = _mm512_unpacklo_epi32(inPermuted, hi);
257  outh = _mm512_unpackhi_epi32(inPermuted, hi);
258  }
259 };
260 
263 template <typename TOut>
265  typename boost::enable_if_c<sizeof(TOut)==8 &&
266  boost::is_integral<TOut>::value>::type>
267 {
268  static IPSDK_FORCEINLINE
269  void act(const Avx512Type<ipUInt32>::Type& in,
270  typename Avx512Type<TOut>::Type& outl,
271  typename Avx512Type<TOut>::Type& outh)
272  {
273  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
274  const Avx512Type<ipUInt32>::Type zero = _mm512_setzero_si512();
275  outl = _mm512_unpacklo_epi32(inPermuted, zero);
276  outh = _mm512_unpackhi_epi32(inPermuted, zero);
277  }
278 };
279 
282 template <typename TOut>
284  typename boost::enable_if_c<sizeof(TOut)==4
285  && boost::is_integral<TOut>::value>::type>
286 {
287  static IPSDK_FORCEINLINE
288  void act(const Avx512Type<ipInt16>::Type& in,
289  typename Avx512Type<TOut>::Type& outl,
290  typename Avx512Type<TOut>::Type& outh)
291  {
292  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
293 
294  outl = _mm512_unpacklo_epi16(inPermuted, inPermuted);
295  outh = _mm512_unpackhi_epi16(inPermuted, inPermuted);
296  outl = _mm512_srai_epi32(outl, 16);
297  outh = _mm512_srai_epi32(outh, 16);
298  }
299 };
300 
303 template <typename TOut>
305  typename boost::enable_if_c<sizeof(TOut)==4 &&
306  boost::is_integral<TOut>::value>::type>
307 {
308  static IPSDK_FORCEINLINE
309  void act(const Avx512Type<ipUInt16>::Type& in,
310  typename Avx512Type<TOut>::Type& outl,
311  typename Avx512Type<TOut>::Type& outh)
312  {
313  __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
314 
315  outl = _mm512_unpacklo_epi16(inPermuted, _mm512_set1_epi16(0));
316  outh = _mm512_unpackhi_epi16(inPermuted, _mm512_set1_epi16(0));
317  }
318 };
319 
322 template <typename TIn>
324  typename boost::enable_if_c<sizeof(TIn)==2 &&
325  boost::is_integral<TIn>::value>::type>
326 {
327  static IPSDK_FORCEINLINE
328  void act(const typename Avx512Type<TIn>::Type& in,
331  {
332  Avx512Type<ipInt32>::Type in32l, in32h;
336  }
337 };
338 
341 template <>
343 {
344  static IPSDK_FORCEINLINE
345  void act(const Avx512Type<ipReal32>::Type& in,
348  {
349  outl = _mm512_cvtps_pd(_mm512_castps512_ps256(in));
350  outh = _mm512_cvtps_pd(_mm512_castps512_ps256(_mm512_shuffle_f32x4(in, in, 0x4E)));
351  }
352 };
353 
357 template <typename TIn>
359  typename boost::enable_if_c<sizeof(TIn)==2>::type
360 >
361 {
362  static IPSDK_FORCEINLINE
363  void act(const typename Avx512Type<TIn>::Type& inl,
364  const typename Avx512Type<TIn>::Type& inh,
366  {
367  out = _mm512_packus_epi16(inl, inh);
368  out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
369  }
370 };
371 
374 template <typename TIn>
376  typename boost::enable_if_c<sizeof(TIn)==2>::type>
377 {
378  static IPSDK_FORCEINLINE
379  void act(const typename Avx512Type<TIn>::Type& inl,
380  const typename Avx512Type<TIn>::Type& inh,
382  {
383  out = _mm512_packs_epi16(inl, inh);
384  out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
385  }
386 };
387 
390 template <typename TIn>
392  typename boost::enable_if_c<sizeof(TIn)==4 && boost::is_integral<TIn>::value>::type>
393 {
394  static IPSDK_FORCEINLINE
395  void act(const typename Avx512Type<TIn>::Type& inl,
396  const typename Avx512Type<TIn>::Type& inh,
398  {
399  out = _mm512_packs_epi32(inl, inh);
400  out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
401  }
402 };
403 
406 template <typename TIn>
408  typename boost::enable_if_c<sizeof(TIn)==4 && boost::is_integral<TIn>::value>::type>
409 {
410  static IPSDK_FORCEINLINE
411  void act(const typename Avx512Type<TIn>::Type& inl,
412  const typename Avx512Type<TIn>::Type& inh,
414  {
415  out = _mm512_packus_epi32(inl, inh);
416  out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
417  }
418 };
419 
422 template <typename TOut>
424  typename boost::enable_if_c<sizeof(TOut)==2>::type>
425 {
426  static IPSDK_FORCEINLINE
427  void act(const Avx512Type<ipReal32>::Type& inl,
428  const Avx512Type<ipReal32>::Type& inh,
429  typename Avx512Type<TOut>::Type& out)
430  {
431  Avx512Type<ipInt32>::Type inlInt32, inhInt32;
433  inl, inlInt32);
435  inh, inhInt32);
437  inlInt32, inhInt32, out);
438  }
439 };
440 
443 template <>
445 {
446  static IPSDK_FORCEINLINE
447  void act(const Avx512Type<ipReal64>::Type& inl,
448  const Avx512Type<ipReal64>::Type& inh,
450  {
451  //out = _mm512_castps256_ps512(_mm512_cvtpd_ps(inl));
452  //out = _mm512_insertf256_ps(out, _mm512_cvtpd_ps(inh), 1);
453  const Avx512Type<ipReal32>::Type inl_ps = _mm512_castps256_ps512(_mm512_cvtpd_ps(inl));
454  const Avx512Type<ipReal32>::Type inh_ps = _mm512_castps256_ps512(_mm512_cvtpd_ps(inh));
455  out = _mm512_shuffle_f32x4(inl_ps, inh_ps, 0x44);
456  }
457 };
458 
461 
462 } // end of namespace detail
463 } // end of namespace simd
464 } // end of namespace ipsdk
465 
466 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__
int8_t ipInt8
Base types definition.
Definition: BaseTypes.h:48
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: AllBitsToOneReg.h:33
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition: CastReg.h:30
int16_t ipInt16
Base types definition.
Definition: BaseTypes.h:50
uint8_t ipUInt8
Base types definition.
Definition: BaseTypes.h:49
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: Avx512MaskTypes.h:36
Definition of import/export macro for library.
Definition: AllBitsToZeroReg.h:31
Definition: IsLessRegDecl.h:30
uint16_t ipUInt16
Base types definition.
Definition: BaseTypes.h:51
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
Definition: MaskBlendReg.h:30
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36