IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
MulReg.h
Go to the documentation of this file.
1 // MulReg.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_MULREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_MULREG_H__
17 
22 
23 namespace ipsdk {
24 namespace simd {
25 namespace detail {
26 
29 
30 static IPSDK_FORCEINLINE
31 __m512i selectb (const __mmask64& s, const __m512i& a, const __m512i& b) {
32  return _mm512_mask_blend_epi8(s, b, a);
33 }
34 
35 static IPSDK_FORCEINLINE
36 void selectb (const __mmask64& s, const __m512i& a, const __m512i& b, __m512i& res) {
37  res = _mm512_mask_blend_epi8(s, b, a);
38 }
39 
40 static IPSDK_FORCEINLINE
41 __m512i
42 _custom_mm512_mullo_epi8(__m512i a, __m512i b)
43 {
44  __m512i aodd = _mm512_srli_epi16(a,8); // odd numbered elements of a
45  __m512i bodd = _mm512_srli_epi16(b,8); // odd numbered elements of b
46  __m512i muleven = _mm512_mullo_epi16(a,b); // product of even numbered elements
47  __m512i mulodd = _mm512_mullo_epi16(aodd,bodd); // product of odd numbered elements
48  mulodd = _mm512_slli_epi16(mulodd,8); // put odd numbered elements back in place
49  __mmask64 mask = 0x5555555555555555; // mask for even positions
50  __m512i product = selectb(mask, muleven, mulodd); // interleave even and odd
51  return product;
52 }
53 
54 static IPSDK_FORCEINLINE
55 void
56 _custom_mm512_mullo_epi8(__m512i a, __m512i b, __m512i& res)
57 {
58  __m512i aodd = _mm512_srli_epi16(a,8); // odd numbered elements of a
59  __m512i bodd = _mm512_srli_epi16(b,8); // odd numbered elements of b
60  __m512i muleven = _mm512_mullo_epi16(a,b); // product of even numbered elements
61  __m512i mulodd = _mm512_mullo_epi16(aodd,bodd); // product of odd numbered elements
62  mulodd = _mm512_slli_epi16(mulodd,8); // put odd numbered elements back in place
63  __mmask64 mask = 0x5555555555555555; // mask for even positions
64  selectb(mask,muleven,mulodd, res); // interleave even and odd
65 }
66 
69 
72 template <typename T>
74  typename boost::enable_if_c<boost::is_integral<T>::value
75  && sizeof(T)==1>::type>
76 {
77  static IPSDK_FORCEINLINE
78  typename Avx512Type<T>::Type
79  act(const typename Avx512Type<T>::Type& in1,
80  const typename Avx512Type<T>::Type& in2)
81  {
82  return _custom_mm512_mullo_epi8(in1, in2);
83  }
84 
85  static IPSDK_FORCEINLINE
86  void
87  act(const typename Avx512Type<T>::Type& in1,
88  const typename Avx512Type<T>::Type& in2,
89  typename Avx512Type<T>::Type& out)
90  {
91  _custom_mm512_mullo_epi8(in1, in2, out);
92  }
93 };
94 
97 template <typename T>
99  typename boost::enable_if_c<boost::is_integral<T>::value
100  && sizeof(T)==2>::type>
101 {
102  static IPSDK_FORCEINLINE
103  typename Avx512Type<T>::Type
104  act(const typename Avx512Type<T>::Type& in1,
105  const typename Avx512Type<T>::Type& in2)
106  {
107  return _mm512_mullo_epi16(in1, in2);
108  }
109 
110  static IPSDK_FORCEINLINE
111  void
112  act(const typename Avx512Type<T>::Type& in1,
113  const typename Avx512Type<T>::Type& in2,
114  typename Avx512Type<T>::Type& out)
115  {
116  out = _mm512_mullo_epi16(in1, in2);
117  }
118 };
119 
122 template <typename T>
124  typename boost::enable_if_c<boost::is_integral<T>::value
125  && sizeof(T)==4>::type>
126 {
127  static IPSDK_FORCEINLINE
128  typename Avx512Type<T>::Type
129  act(const typename Avx512Type<T>::Type& in1,
130  const typename Avx512Type<T>::Type& in2)
131  {
132  return _mm512_mullo_epi32(in1, in2);
133  }
134 
135  static IPSDK_FORCEINLINE
136  void
137  act(const typename Avx512Type<T>::Type& in1,
138  const typename Avx512Type<T>::Type& in2,
139  typename Avx512Type<T>::Type& out)
140  {
141  out = _mm512_mullo_epi32(in1, in2);
142  }
143 };
144 
147 template <>
149 {
150  static IPSDK_FORCEINLINE
152  act(const Avx512Type<ipReal32>::Type& in1,
153  const Avx512Type<ipReal32>::Type& in2)
154  {
155  return _mm512_mul_ps(in1, in2);
156  }
157 
158  static IPSDK_FORCEINLINE
159  void
160  act(const Avx512Type<ipReal32>::Type& in1,
161  const Avx512Type<ipReal32>::Type& in2,
163  {
164  out = _mm512_mul_ps(in1, in2);
165  }
166 };
167 
170 template <>
172 {
173  static IPSDK_FORCEINLINE
175  act(const Avx512Type<ipReal64>::Type& in1,
176  const Avx512Type<ipReal64>::Type& in2)
177  {
178  return _mm512_mul_pd(in1, in2);
179  }
180 
181  static IPSDK_FORCEINLINE
182  void
183  act(const Avx512Type<ipReal64>::Type& in1,
184  const Avx512Type<ipReal64>::Type& in2,
186  {
187  out = _mm512_mul_pd(in1, in2);
188  }
189 };
190 
193 
194 } // end of namespace detail
195 } // end of namespace simd
196 } // end of namespace ipsdk
197 
198 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Predefined types for Avx512 instruction set management.
Definition of import/export macro for library.
Definition: MulReg.h:39
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36