IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
MulReg.h
Go to the documentation of this file.
1 // MulReg.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__
17 
22 
23 namespace ipsdk {
24 namespace simd {
25 namespace detail {
26 
29 
30 static IPSDK_FORCEINLINE
31 __m256i selectb (const __m256i& s, const __m256i& a, const __m256i& b) {
32  return _mm256_blendv_epi8 (b, a, s);
33 }
34 
35 static IPSDK_FORCEINLINE
36 void selectb (const __m256i& s, const __m256i& a, const __m256i& b, __m256i& res) {
37  res = _mm256_blendv_epi8 (b, a, s);
38 }
39 
40 static IPSDK_FORCEINLINE
41 __m256i
42 _custom_mm256_mullo_epi8(__m256i a, __m256i b)
43 {
44  __m256i aodd = _mm256_srli_epi16(a,8); // odd numbered elements of a
45  __m256i bodd = _mm256_srli_epi16(b,8); // odd numbered elements of b
46  __m256i muleven = _mm256_mullo_epi16(a,b); // product of even numbered elements
47  __m256i mulodd = _mm256_mullo_epi16(aodd,bodd); // product of odd numbered elements
48  mulodd = _mm256_slli_epi16(mulodd,8); // put odd numbered elements back in place
49  __m256i mask = _mm256_set1_epi32(0x00FF00FF); // mask for even positions
50  __m256i product = selectb(mask,muleven,mulodd); // interleave even and odd
51  return product;
52 }
53 
54 static IPSDK_FORCEINLINE
55 void
56 _custom_mm256_mullo_epi8(__m256i a, __m256i b, __m256i& res)
57 {
58  __m256i aodd = _mm256_srli_epi16(a,8); // odd numbered elements of a
59  __m256i bodd = _mm256_srli_epi16(b,8); // odd numbered elements of b
60  __m256i muleven = _mm256_mullo_epi16(a,b); // product of even numbered elements
61  __m256i mulodd = _mm256_mullo_epi16(aodd,bodd); // product of odd numbered elements
62  mulodd = _mm256_slli_epi16(mulodd,8); // put odd numbered elements back in place
63  __m256i mask = _mm256_set1_epi32(0x00FF00FF); // mask for even positions
64  selectb(mask,muleven,mulodd, res); // interleave even and odd
65 }
66 
69 
72 template <typename T>
74  typename boost::enable_if_c<boost::is_integral<T>::value
75  && sizeof(T)==1>::type>
76 {
77  static IPSDK_FORCEINLINE
78  typename AvxType<T>::Type
79  act(const typename AvxType<T>::Type& in1,
80  const typename AvxType<T>::Type& in2)
81  {
82  return _custom_mm256_mullo_epi8(in1, in2);
83  }
84 
85  static IPSDK_FORCEINLINE
86  void
87  act(const typename AvxType<T>::Type& in1,
88  const typename AvxType<T>::Type& in2,
89  typename AvxType<T>::Type& out)
90  {
91  _custom_mm256_mullo_epi8(in1, in2, out);
92  }
93 };
94 
97 template <typename T>
99  typename boost::enable_if_c<boost::is_integral<T>::value
100  && sizeof(T)==2>::type>
101 {
102  static IPSDK_FORCEINLINE
103  typename AvxType<T>::Type
104  act(const typename AvxType<T>::Type& in1,
105  const typename AvxType<T>::Type& in2)
106  {
107  return _mm256_mullo_epi16(in1, in2);
108  }
109 
110  static IPSDK_FORCEINLINE
111  void
112  act(const typename AvxType<T>::Type& in1,
113  const typename AvxType<T>::Type& in2,
114  typename AvxType<T>::Type& out)
115  {
116  out = _mm256_mullo_epi16(in1, in2);
117  }
118 };
119 
122 template <typename T>
124  typename boost::enable_if_c<boost::is_integral<T>::value
125  && sizeof(T)==4>::type>
126 {
127  static IPSDK_FORCEINLINE
128  typename AvxType<T>::Type
129  act(const typename AvxType<T>::Type& in1,
130  const typename AvxType<T>::Type& in2)
131  {
132  return _mm256_mullo_epi32(in1, in2);
133  }
134 
135  static IPSDK_FORCEINLINE
136  void
137  act(const typename AvxType<T>::Type& in1,
138  const typename AvxType<T>::Type& in2,
139  typename AvxType<T>::Type& out)
140  {
141  out = _mm256_mullo_epi32(in1, in2);
142  }
143 };
144 
147 template <>
149 {
150  static IPSDK_FORCEINLINE
152  act(const AvxType<ipReal32>::Type& in1,
153  const AvxType<ipReal32>::Type& in2)
154  {
156  }
157 
158  static IPSDK_FORCEINLINE
159  void
160  act(const AvxType<ipReal32>::Type& in1,
161  const AvxType<ipReal32>::Type& in2,
163  {
165  }
166 };
167 
170 template <>
172 {
173  static IPSDK_FORCEINLINE
175  act(const AvxType<ipReal64>::Type& in1,
176  const AvxType<ipReal64>::Type& in2)
177  {
179  }
180 
181  static IPSDK_FORCEINLINE
182  void
183  act(const AvxType<ipReal64>::Type& in1,
184  const AvxType<ipReal64>::Type& in2,
186  {
188  }
189 };
190 
193 
194 } // end of namespace detail
195 } // end of namespace simd
196 } // end of namespace ipsdk
197 
198 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_MULREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Predefined types for Avx instruction set management.
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
Definition: MulReg.h:39
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
float ipReal32
Base types definition.
Definition: BaseTypes.h:56