15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_MULREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_MULREG_H__ 30 static IPSDK_FORCEINLINE
31 __m512i selectb (
const __mmask64& s,
const __m512i& a,
const __m512i& b) {
32 return _mm512_mask_blend_epi8(s, b, a);
35 static IPSDK_FORCEINLINE
36 void selectb (
const __mmask64& s,
const __m512i& a,
const __m512i& b, __m512i& res) {
37 res = _mm512_mask_blend_epi8(s, b, a);
40 static IPSDK_FORCEINLINE
42 _custom_mm512_mullo_epi8(__m512i a, __m512i b)
44 __m512i aodd = _mm512_srli_epi16(a,8);
45 __m512i bodd = _mm512_srli_epi16(b,8);
46 __m512i muleven = _mm512_mullo_epi16(a,b);
47 __m512i mulodd = _mm512_mullo_epi16(aodd,bodd);
48 mulodd = _mm512_slli_epi16(mulodd,8);
49 __mmask64 mask = 0x5555555555555555;
50 __m512i product = selectb(mask, muleven, mulodd);
54 static IPSDK_FORCEINLINE
56 _custom_mm512_mullo_epi8(__m512i a, __m512i b, __m512i& res)
58 __m512i aodd = _mm512_srli_epi16(a,8);
59 __m512i bodd = _mm512_srli_epi16(b,8);
60 __m512i muleven = _mm512_mullo_epi16(a,b);
61 __m512i mulodd = _mm512_mullo_epi16(aodd,bodd);
62 mulodd = _mm512_slli_epi16(mulodd,8);
63 __mmask64 mask = 0x5555555555555555;
64 selectb(mask,muleven,mulodd, res);
74 typename boost::enable_if_c<boost::is_integral<T>::value
75 && sizeof(T)==1>::type>
77 static IPSDK_FORCEINLINE
82 return _custom_mm512_mullo_epi8(in1, in2);
85 static IPSDK_FORCEINLINE
91 _custom_mm512_mullo_epi8(in1, in2, out);
99 typename boost::enable_if_c<boost::is_integral<T>::value
100 && sizeof(T)==2>::type>
102 static IPSDK_FORCEINLINE
107 return _mm512_mullo_epi16(in1, in2);
110 static IPSDK_FORCEINLINE
116 out = _mm512_mullo_epi16(in1, in2);
122 template <
typename T>
124 typename boost::enable_if_c<boost::is_integral<T>::value
125 && sizeof(T)==4>::type>
127 static IPSDK_FORCEINLINE
132 return _mm512_mullo_epi32(in1, in2);
135 static IPSDK_FORCEINLINE
141 out = _mm512_mullo_epi32(in1, in2);
150 static IPSDK_FORCEINLINE
155 return _mm512_mul_ps(in1, in2);
158 static IPSDK_FORCEINLINE
164 out = _mm512_mul_ps(in1, in2);
173 static IPSDK_FORCEINLINE
178 return _mm512_mul_pd(in1, in2);
181 static IPSDK_FORCEINLINE
187 out = _mm512_mul_pd(in1, in2);
198 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Predefined types for Avx512 instruction set management.
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36