15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_MULREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_MULREG_H__ 29 static IPSDK_FORCEINLINE
31 _custom_mm_mullo_epi8(__m128i a, __m128i b)
43 __m128i mask = _mm_set1_epi16(0xFF);
44 return _mm_or_si128 ( _mm_and_si128(mask, _mm_mullo_epi16(a, b))
46 ( _mm_and_si128 ( mask
48 ( _mm_srli_epi16(a, 8)
49 , _mm_srli_epi16(b, 8)
57 static IPSDK_FORCEINLINE
59 _custom_mm_mullo_epi8(__m128i a, __m128i b, __m128i& out)
61 __m128i mask = _mm_set1_epi16(0xFF);
62 out = _mm_or_si128 ( _mm_and_si128(mask, _mm_mullo_epi16(a, b))
64 ( _mm_and_si128 ( mask
66 ( _mm_srli_epi16(a, 8)
67 , _mm_srli_epi16(b, 8)
77 _custom_mm_mullo_epi32(__m128i a, __m128i b)
96 _mm_setr_epi32(0xffffffff,0,0xffffffff,0)
100 _mm_mul_epu32( _mm_srli_si128(a, 4)
101 , _mm_srli_si128(b, 4)
103 , _mm_setr_epi32(0xffffffff,0,0xffffffff,0)
112 _custom_mm_mullo_epi32(__m128i a, __m128i b, __m128i& out)
117 _mm_setr_epi32(0xffffffff,0,0xffffffff,0)
121 _mm_mul_epu32( _mm_srli_si128(a, 4)
122 , _mm_srli_si128(b, 4)
124 , _mm_setr_epi32(0xffffffff,0,0xffffffff,0)
133 template <
typename T>
135 typename boost::enable_if_c<boost::is_integral<T>::value
136 && sizeof(T) == 1>::type>
138 static IPSDK_FORCEINLINE
143 return _custom_mm_mullo_epi8(in1, in2);
146 static IPSDK_FORCEINLINE
152 _custom_mm_mullo_epi8(in1, in2, out);
158 template <
typename T>
160 typename boost::enable_if_c<boost::is_integral<T>::value
161 && sizeof(T) == 2>::type>
163 static IPSDK_FORCEINLINE
168 return _mm_mullo_epi16(in1, in2);
171 static IPSDK_FORCEINLINE
177 out = _mm_mullo_epi16(in1, in2);
183 template <
typename T>
185 typename boost::enable_if_c<boost::is_integral<T>::value
186 && sizeof(T) == 4>::type>
188 static IPSDK_FORCEINLINE
193 return _custom_mm_mullo_epi32(in1, in2);
196 static IPSDK_FORCEINLINE
202 _custom_mm_mullo_epi32(in1, in2, out);
211 static IPSDK_FORCEINLINE
216 return _mm_mul_ps(in1, in2);
219 static IPSDK_FORCEINLINE
225 out = _mm_mul_ps(in1, in2);
234 static IPSDK_FORCEINLINE
239 return _mm_mul_pd(in1, in2);
242 static IPSDK_FORCEINLINE
248 out = _mm_mul_pd(in1, in2);
259 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_MULREG_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Predefined types for Sse2 instruction set management.
Definition of import/export macro for library.
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
structure used to retrieve SSE2 type associated to a base type
Definition: Sse2Types.h:32
float ipReal32
Base types definition.
Definition: BaseTypes.h:56