15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__ 27 #include <boost/mpl/and.hpp> 28 #include <boost/mpl/or.hpp> 29 #include <boost/mpl/sizeof.hpp> 31 #include "immintrin.h" 42 template <
typename TIn,
typename TOut>
44 typename boost::enable_if<
45 typename boost::mpl::or_<
46 typename boost::is_same<TIn, TOut>::type,
47 typename boost::mpl::and_<
48 typename boost::is_integral<TIn>::type,
49 typename boost::mpl::and_<
50 typename boost::is_integral<TOut>::type,
51 typename boost::mpl::equal_to<
52 boost::mpl::sizeof_<TIn>,
53 boost::mpl::sizeof_<TOut>
61 static IPSDK_FORCEINLINE
68 static IPSDK_FORCEINLINE
81 template <
typename TOut>
83 typename boost::enable_if<
84 typename boost::mpl::equal_to<
85 boost::mpl::int_<sizeof(TOut)>,
91 static IPSDK_FORCEINLINE
96 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
97 outl = _mm512_unpacklo_epi8(inPermuted, _mm512_set1_epi8(0));
98 outh = _mm512_unpackhi_epi8(inPermuted, _mm512_set1_epi8(0));
104 template <
typename TOut>
106 typename boost::enable_if_c<sizeof(TOut)==2>::type>
108 static IPSDK_FORCEINLINE
113 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
114 outl = _mm512_unpacklo_epi8(inPermuted, inPermuted);
115 outl = _mm512_srai_epi16(outl, 8);
116 outh = _mm512_unpackhi_epi8(inPermuted, inPermuted);
117 outh = _mm512_srai_epi16(outh, 8);
126 static IPSDK_FORCEINLINE
130 out = _mm512_cvtepi32_ps(in);
139 static IPSDK_FORCEINLINE
143 out = _mm512_cvtepu32_ps(in);
152 static IPSDK_FORCEINLINE
156 out = _mm512_cvttps_epi32(in);
165 static IPSDK_FORCEINLINE
169 out = _mm512_cvttps_epu32(in);
178 static IPSDK_FORCEINLINE
183 outl = _mm512_cvtepi32_pd(_mm512_castsi512_si256(in));
184 outh = _mm512_cvtepi32_pd(_mm512_extracti64x4_epi64(in, 1));
193 static IPSDK_FORCEINLINE
198 outl = _mm512_cvtepu32_pd(_mm512_castsi512_si256(in));
199 outh = _mm512_cvtepu32_pd(_mm512_extracti64x4_epi64(in, 1));
208 static IPSDK_FORCEINLINE
213 const __m256i outl = _mm512_cvttpd_epi32(inl);
214 const __m256i outh = _mm512_cvttpd_epi32(inh);
215 out = _mm512_inserti64x4(out, outl, 0);
216 out = _mm512_inserti64x4(out, outh, 1);
225 static IPSDK_FORCEINLINE
230 const __m256i outl = _mm512_cvttpd_epu32(inl);
231 const __m256i outh = _mm512_cvttpd_epu32(inh);
232 out = _mm512_inserti64x4(out, outl, 0);
233 out = _mm512_inserti64x4(out, outh, 1);
239 template <
typename TOut>
241 typename boost::enable_if_c<sizeof(TOut)==8 &&
242 boost::is_integral<TOut>::value>::type>
244 static IPSDK_FORCEINLINE
249 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
256 outl = _mm512_unpacklo_epi32(inPermuted, hi);
257 outh = _mm512_unpackhi_epi32(inPermuted, hi);
263 template <
typename TOut>
265 typename boost::enable_if_c<sizeof(TOut)==8 &&
266 boost::is_integral<TOut>::value>::type>
268 static IPSDK_FORCEINLINE
273 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
275 outl = _mm512_unpacklo_epi32(inPermuted, zero);
276 outh = _mm512_unpackhi_epi32(inPermuted, zero);
282 template <
typename TOut>
284 typename boost::enable_if_c<sizeof(TOut)==4
285 && boost::is_integral<TOut>::value>::type>
287 static IPSDK_FORCEINLINE
292 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
294 outl = _mm512_unpacklo_epi16(inPermuted, inPermuted);
295 outh = _mm512_unpackhi_epi16(inPermuted, inPermuted);
296 outl = _mm512_srai_epi32(outl, 16);
297 outh = _mm512_srai_epi32(outh, 16);
303 template <
typename TOut>
305 typename boost::enable_if_c<sizeof(TOut)==4 &&
306 boost::is_integral<TOut>::value>::type>
308 static IPSDK_FORCEINLINE
313 __m512i inPermuted = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0), in);
315 outl = _mm512_unpacklo_epi16(inPermuted, _mm512_set1_epi16(0));
316 outh = _mm512_unpackhi_epi16(inPermuted, _mm512_set1_epi16(0));
322 template <
typename TIn>
324 typename boost::enable_if_c<sizeof(TIn)==2 &&
325 boost::is_integral<TIn>::value>::type>
327 static IPSDK_FORCEINLINE
344 static IPSDK_FORCEINLINE
349 outl = _mm512_cvtps_pd(_mm512_castps512_ps256(in));
350 outh = _mm512_cvtps_pd(_mm512_castps512_ps256(_mm512_shuffle_f32x4(in, in, 0x4E)));
357 template <
typename TIn>
359 typename boost::enable_if_c<sizeof(TIn)==2>::type
362 static IPSDK_FORCEINLINE
367 out = _mm512_packus_epi16(inl, inh);
368 out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
374 template <
typename TIn>
376 typename boost::enable_if_c<sizeof(TIn)==2>::type>
378 static IPSDK_FORCEINLINE
383 out = _mm512_packs_epi16(inl, inh);
384 out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
390 template <
typename TIn>
392 typename boost::enable_if_c<sizeof(TIn)==4 && boost::is_integral<TIn>::value>::type>
394 static IPSDK_FORCEINLINE
399 out = _mm512_packs_epi32(inl, inh);
400 out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
406 template <
typename TIn>
408 typename boost::enable_if_c<sizeof(TIn)==4 && boost::is_integral<TIn>::value>::type>
410 static IPSDK_FORCEINLINE
415 out = _mm512_packus_epi32(inl, inh);
416 out = _mm512_permutexvar_epi64(_mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0), out);
422 template <
typename TOut>
424 typename boost::enable_if_c<sizeof(TOut)==2>::type>
426 static IPSDK_FORCEINLINE
437 inlInt32, inhInt32, out);
446 static IPSDK_FORCEINLINE
455 out = _mm512_shuffle_f32x4(inl_ps, inh_ps, 0x44);
466 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX512_CASTREG_H__
int8_t ipInt8
Base types definition.
Definition: BaseTypes.h:48
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: AllBitsToOneReg.h:33
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
int16_t ipInt16
Base types definition.
Definition: BaseTypes.h:50
uint8_t ipUInt8
Base types definition.
Definition: BaseTypes.h:49
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: Avx512MaskTypes.h:36
Definition of import/export macro for library.
Definition: AllBitsToZeroReg.h:31
Definition: IsLessRegDecl.h:30
uint16_t ipUInt16
Base types definition.
Definition: BaseTypes.h:51
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
Definition: MaskBlendReg.h:30
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36