IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
SumReg.h
Go to the documentation of this file.
1 // SumReg.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX512_SUMREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX512_SUMREG_H__
17 
22 
23 namespace ipsdk {
24 namespace simd {
25 namespace detail {
26 namespace reduction {
27 
30 /*
33 template <typename T>
34 struct SumReg<eInstructionSet::eIS_Avx512, T,
35  typename boost::enable_if_c<sizeof(T) == 1>::type>
36 {
37  static IPSDK_FORCEINLINE
38  T
39  act(const typename Avx512Type<T>::Type& in)
40  {
41  T unloaded[64];
42  UnloadReg<eInstructionSet::eIS_Avx512, T>::act(in, unloaded);
43  T tsum = unloaded[0];
44  for(int i=1; i<64; ++i) {
45  tsum += unloaded[i] ;
46  }
47  return tsum;
48  }
49 };
50 
53 template <typename T>
54 struct SumReg<eInstructionSet::eIS_Avx512, T,
55  typename boost::enable_if_c<sizeof(T) == 2>::type>
56 {
57  static IPSDK_FORCEINLINE
58  T
59  act(const typename Avx512Type<T>::Type& in)
60  {
61  T unloaded[32];
62  UnloadReg<eInstructionSet::eIS_Avx512, T>::act(in, unloaded);
63  T tsum = unloaded[0];
64  for(int i=1; i<32; ++i) {
65  tsum += unloaded[i];
66  }
67  return tsum;
68  }
69 };*/
70 
73 template <>
75 {
76  static IPSDK_FORCEINLINE
77  ipInt32
78  act(const Avx512Type<ipInt32>::Type& in)
79  {
80  return _mm512_reduce_add_epi32(in);
81  }
82 };
83 
86 template <>
88 {
89  static IPSDK_FORCEINLINE
90  ipUInt32
91  act(const Avx512Type<ipUInt32>::Type& in)
92  {
93  return _mm512_reduce_add_epi32(in);
94  }
95 };
96 
99 template <>
101 {
102  static IPSDK_FORCEINLINE
103  ipInt64
104  act(const Avx512Type<ipInt64>::Type& in)
105  {
106  return _mm512_reduce_add_epi64(in);
107  }
108 };
109 
112 template <>
114 {
115  static IPSDK_FORCEINLINE
116  ipUInt64
117  act(const Avx512Type<ipUInt64>::Type& in)
118  {
119  return _mm512_reduce_add_epi64(in);
120  }
121 };
122 
125 template <>
127 {
128  static IPSDK_FORCEINLINE
129  ipReal32
130  act(const Avx512Type<ipReal32>::Type& in)
131  {
132  return _mm512_reduce_add_ps(in);
133  }
134 };
135 
138 template <>
140 {
141  static IPSDK_FORCEINLINE
142  ipReal64
143  act(const Avx512Type<ipReal64>::Type& in)
144  {
145  return _mm512_reduce_add_pd(in);
146  }
147 };
148 
151 
152 } // end of namespace reduction
153 } // end of namespace detail
154 } // end of namespace simd
155 } // end of namespace ipsdk
156 
157 #endif // __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX512_SUMREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
uint64_t ipUInt64
Base types definition.
Definition: BaseTypes.h:55
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure, specialized to implement the computation of the sum of the elements loaded in a r...
Definition of import/export macro for library.
int64_t ipInt64
Base types definition.
Definition: BaseTypes.h:54
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36