15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__ 30 static IPSDK_FORCEINLINE
31 __m256i selectb (
const __m256i& s,
const __m256i& a,
const __m256i& b) {
32 return _mm256_blendv_epi8 (b, a, s);
35 static IPSDK_FORCEINLINE
36 void selectb (
const __m256i& s,
const __m256i& a,
const __m256i& b, __m256i& res) {
37 res = _mm256_blendv_epi8 (b, a, s);
40 static IPSDK_FORCEINLINE
42 _custom_mm256_mullo_epi8(__m256i a, __m256i b)
44 __m256i aodd = _mm256_srli_epi16(a,8);
45 __m256i bodd = _mm256_srli_epi16(b,8);
46 __m256i muleven = _mm256_mullo_epi16(a,b);
47 __m256i mulodd = _mm256_mullo_epi16(aodd,bodd);
48 mulodd = _mm256_slli_epi16(mulodd,8);
49 __m256i mask = _mm256_set1_epi32(0x00FF00FF);
50 __m256i product = selectb(mask,muleven,mulodd);
54 static IPSDK_FORCEINLINE
56 _custom_mm256_mullo_epi8(__m256i a, __m256i b, __m256i& res)
58 __m256i aodd = _mm256_srli_epi16(a,8);
59 __m256i bodd = _mm256_srli_epi16(b,8);
60 __m256i muleven = _mm256_mullo_epi16(a,b);
61 __m256i mulodd = _mm256_mullo_epi16(aodd,bodd);
62 mulodd = _mm256_slli_epi16(mulodd,8);
63 __m256i mask = _mm256_set1_epi32(0x00FF00FF);
64 selectb(mask,muleven,mulodd, res);
74 typename boost::enable_if_c<boost::is_integral<T>::value
75 && sizeof(T)==1>::type>
77 static IPSDK_FORCEINLINE
82 return _custom_mm256_mullo_epi8(in1, in2);
85 static IPSDK_FORCEINLINE
91 _custom_mm256_mullo_epi8(in1, in2, out);
99 typename boost::enable_if_c<boost::is_integral<T>::value
100 && sizeof(T)==2>::type>
102 static IPSDK_FORCEINLINE
107 return _mm256_mullo_epi16(in1, in2);
110 static IPSDK_FORCEINLINE
116 out = _mm256_mullo_epi16(in1, in2);
122 template <
typename T>
124 typename boost::enable_if_c<boost::is_integral<T>::value
125 && sizeof(T)==4>::type>
127 static IPSDK_FORCEINLINE
132 return _mm256_mullo_epi32(in1, in2);
135 static IPSDK_FORCEINLINE
141 out = _mm256_mullo_epi32(in1, in2);
150 static IPSDK_FORCEINLINE
158 static IPSDK_FORCEINLINE
173 static IPSDK_FORCEINLINE
181 static IPSDK_FORCEINLINE
198 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Predefined types for Avx instruction set management.
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
float ipReal32
Base types definition.
Definition: BaseTypes.h:56