IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
IsNotEqualRegImpl.h
Go to the documentation of this file.
1 // IsNotEqualRegImpl.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX512_ISNOTEQUALREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX512_ISNOTEQUALREGIMPL_H__
17 
19 
20 namespace ipsdk {
21 namespace simd {
22 namespace detail {
23 
26 
27 #define DEFINE_ISNOTEQUALREG_INT(nbBitsInInt, nbBitsInMask) \
28 template <> \
29 struct IsNotEqualReg<eInstructionSet::eIS_Avx512, ipInt ## nbBitsInInt> \
30 { \
31  static IPSDK_FORCEINLINE \
32  __mmask ## nbBitsInMask \
33  act(const Avx512Type<ipInt ## nbBitsInInt>::Type& in1, \
34  const Avx512Type<ipInt ## nbBitsInInt>::Type& in2) \
35  { \
36  return _mm512_cmp_epi ## nbBitsInInt ## _mask(in1, in2, _MM_CMPINT_NE); \
37  } \
38  static IPSDK_FORCEINLINE \
39  void \
40  act(const Avx512Type<ipInt ## nbBitsInInt>::Type& in1, \
41  const Avx512Type<ipInt ## nbBitsInInt>::Type& in2, \
42  __mmask ## nbBitsInMask &out) \
43  { \
44  out = _mm512_cmp_epi ## nbBitsInInt ## _mask(in1, in2, _MM_CMPINT_NE); \
45  } \
46 };
47 
48 #define DEFINE_ISNOTEQUALREG_UINT(nbBitsInInt, nbBitsInMask) \
49 template <> \
50 struct IsNotEqualReg<eInstructionSet::eIS_Avx512, ipUInt ## nbBitsInInt> \
51 { \
52  static IPSDK_FORCEINLINE \
53  __mmask ## nbBitsInMask \
54  act(const Avx512Type<ipUInt ## nbBitsInInt>::Type& in1, \
55  const Avx512Type<ipUInt ## nbBitsInInt>::Type& in2) \
56  { \
57  return _mm512_cmp_epu ## nbBitsInInt ## _mask(in1, in2, _MM_CMPINT_NE); \
58  } \
59  static IPSDK_FORCEINLINE \
60  void \
61  act(const Avx512Type<ipUInt ## nbBitsInInt>::Type& in1, \
62  const Avx512Type<ipUInt ## nbBitsInInt>::Type& in2, \
63  __mmask ## nbBitsInMask &out) \
64  { \
65  out = _mm512_cmp_epu ## nbBitsInInt ## _mask(in1, in2, _MM_CMPINT_NE); \
66  } \
67 };
68 
69 DEFINE_ISNOTEQUALREG_INT(8, 64)
70 DEFINE_ISNOTEQUALREG_UINT(8, 64)
71 DEFINE_ISNOTEQUALREG_INT(16, 32)
72 DEFINE_ISNOTEQUALREG_UINT(16, 32)
73 DEFINE_ISNOTEQUALREG_INT(32, 16)
74 DEFINE_ISNOTEQUALREG_UINT(32, 16)
75 DEFINE_ISNOTEQUALREG_INT(64, 8)
76 DEFINE_ISNOTEQUALREG_UINT(64, 8)
77 
78 template <>
80 {
81  static IPSDK_FORCEINLINE
82  __mmask16
83  act(const Avx512Type<ipReal32>::Type& in1,
84  const Avx512Type<ipReal32>::Type& in2)
85  {
86  return _mm512_cmp_ps_mask(in1, in2, _MM_CMPINT_NE);
87  }
88 
89  static IPSDK_FORCEINLINE
90  void
91  act(const Avx512Type<ipReal32>::Type& in1,
92  const Avx512Type<ipReal32>::Type& in2,
93  __mmask16& out)
94  {
95  out = _mm512_cmp_ps_mask(in1, in2, _MM_CMPINT_NE);
96  }
97 };
98 
99 template <>
101 {
102  static IPSDK_FORCEINLINE
103  __mmask8
104  act(const Avx512Type<ipReal64>::Type& in1,
105  const Avx512Type<ipReal64>::Type& in2)
106  {
107  return _mm512_cmp_pd_mask(in1, in2, _MM_CMPINT_NE);
108  }
109 
110  static IPSDK_FORCEINLINE
111  void
112  act(const Avx512Type<ipReal64>::Type& in1,
113  const Avx512Type<ipReal64>::Type& in2,
114  __mmask8& out)
115  {
116  out = _mm512_cmp_pd_mask(in1, in2, _MM_CMPINT_NE);
117  }
118 };
119 
122 
123 } // end of namespace detail
124 } // end of namespace simd
125 } // end of namespace ipsdk
126 
127 #endif // __IPSDKUTIL_INSTRUCTIONSET_COMPARISON_DETAIL_AVX512_ISNOTEQUALREGIMPL_H__
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: IsNotEqualRegDecl.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36