IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
IsNotEqualRegImpl.h
Go to the documentation of this file.
1 // IsNotEqualRegImpl.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX_ISNOTEQUALREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX_ISNOTEQUALREGIMPL_H__
17 
20 
21 namespace ipsdk {
22 namespace simd {
23 namespace detail {
24 
27 
28 template <typename T>
29 IPSDK_FORCEINLINE
30 typename RegMaskType<eInstructionSet::eIS_Avx, T>::Type
31 IsNotEqualReg<eInstructionSet::eIS_Avx, T,
32  typename boost::enable_if<
33  typename boost::mpl::and_<
34  typename boost::is_integral<T>::type,
35  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
36  >::type
37  >::type
38 >::act(
39  const typename AvxType<T>::Type& in1,
40  const typename AvxType<T>::Type& in2)
41 {
42  typename RegMaskType<eInstructionSet::eIS_Avx, T>::Type out;
43  act(in1, in2, out);
44  return out;
45 }
46 
47 template <typename T>
48 IPSDK_FORCEINLINE
49 void
50 IsNotEqualReg<eInstructionSet::eIS_Avx, T,
51  typename boost::enable_if<
52  typename boost::mpl::and_<
53  typename boost::is_integral<T>::type,
54  typename boost::mpl::equal_to<boost::mpl::int_<sizeof(T)>, boost::mpl::int_<4> >::type
55  >::type
56  >::type
57 >::act(
58  const typename AvxType<T>::Type& in1,
59  const typename AvxType<T>::Type& in2,
60  typename RegMaskType<eInstructionSet::eIS_Avx, T>::Type& out)
61 {
62  __m128i in11, in12;
63  __m128i in21, in22;
64  COPY_IMM_TO_XMM(in1, in11, in12);
65  COPY_IMM_TO_XMM(in2, in21, in22);
66  typename RegMaskType<eInstructionSet::eIS_Sse2, T>::Type out1 =
67  IsNotEqualReg<eInstructionSet::eIS_Sse2, T>::act(in11, in21);
68  typename RegMaskType<eInstructionSet::eIS_Sse2, T>::Type out2 =
69  IsNotEqualReg<eInstructionSet::eIS_Sse2, T>::act(in12, in22);
70  COPY_XMM_TO_IMM(out1, out2, out);
71 }
72 
73 IPSDK_FORCEINLINE
74 RegMaskType<eInstructionSet::eIS_Avx, ipReal32>::Type
75 IsNotEqualReg<eInstructionSet::eIS_Avx, ipReal32>::act(
76  const AvxType<ipReal32>::Type& in1,
77  const AvxType<ipReal32>::Type& in2)
78 {
79  return _mm256_cmp_ps(in1, in2, _CMP_NEQ_UQ);
80 }
81 
82 IPSDK_FORCEINLINE
83 void
84 IsNotEqualReg<eInstructionSet::eIS_Avx, ipReal32>::act(
85  const AvxType<ipReal32>::Type& in1,
86  const AvxType<ipReal32>::Type& in2,
87  RegMaskType<eInstructionSet::eIS_Avx, ipReal32>::Type& out)
88 {
89  out = _mm256_cmp_ps(in1, in2, _CMP_NEQ_UQ);
90 }
91 
92 IPSDK_FORCEINLINE
93 RegMaskType<eInstructionSet::eIS_Avx, ipReal64>::Type
94 IsNotEqualReg<eInstructionSet::eIS_Avx, ipReal64>::act(
95  const AvxType<ipReal64>::Type& in1,
96  const AvxType<ipReal64>::Type& in2)
97 {
98  return _mm256_cmp_pd(in1, in2, _CMP_NEQ_UQ);
99 }
100 
101 IPSDK_FORCEINLINE
102 void
103 IsNotEqualReg<eInstructionSet::eIS_Avx, ipReal64>::act(
104  const AvxType<ipReal64>::Type& in1,
105  const AvxType<ipReal64>::Type& in2,
106  RegMaskType<eInstructionSet::eIS_Avx, ipReal64>::Type& out)
107 {
108  out = _mm256_cmp_pd(in1, in2, _CMP_NEQ_UQ);
109 }
110 
113 
114 } // end of namespace detail
115 } // end of namespace simd
116 } // end of namespace ipsdk
117 
118 #endif // __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX_ISNOTEQUALREGIMPL_H__
Functions to convert from 128 (resp. 256) bits registers to 256 (resp. 128) bits registers.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Advanced Vector Extensions.
Definition: InstructionSetTypes.h:44