IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
SumPack.h
Go to the documentation of this file.
1 // SumPack.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__
17 
22 
23 namespace ipsdk {
24 namespace simd {
25 namespace detail {
26 namespace reduction {
27 
30 
33 template <typename T>
35  typename boost::enable_if_c<boost::is_integral<T>::value
36  && sizeof(T) == 1>::type>
37 {
38  static IPSDK_FORCEINLINE
39  typename UpperType<T>::Type
40  act(const BasePack<ePackType::ePT_Avx, T>& in)
41  {
44  Pack16 in16 = cast<eInstructionSet::eIS_Avx2, typename UpperType<T>::Type>(in);
45 
46  in16._val[0] = AddR::act(in16._val[0], in16._val[1]);
47  in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
48  in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
49  in16._val[0] = _mm256_hadd_epi16(in16._val[0], in16._val[0]);
50 
51  ipInt16 tab[16];
52  _mm256_storeu_si256(reinterpret_cast<typename AvxType<T>::Type*>(tab), in16._val[0]);
53  return static_cast<typename UpperType<T>::Type>(tab[0] + tab[8]);
54  }
55 };
56 
59 template <typename T>
61  typename boost::enable_if_c<boost::is_integral<T>::value
62  && sizeof(T) == 2>::type>
63 {
64  static IPSDK_FORCEINLINE
65  typename UpperType<T>::Type
66  act(const BasePack<ePackType::ePT_Avx, T>& in)
67  {
70 
71  Pack32 in32 =
72  cast<eInstructionSet::eIS_Avx2, typename UpperType<T>::Type>(in);
73 
74  in32._val[0] = AddR::act(in32._val[0], in32._val[1]);
75  in32._val[0] = AddR::act(in32._val[0], in32._val[2]);
76  in32._val[0] = AddR::act(in32._val[0], in32._val[3]);
77 
78  in32._val[0] = _mm256_hadd_epi32(in32._val[0], in32._val[0]);
79  in32._val[0] = _mm256_hadd_epi32(in32._val[0], in32._val[0]);
80 
81  ipInt32 tab[16];
82  _mm256_storeu_si256(reinterpret_cast<typename AvxType<T>::Type*>(tab),
83  in32._val[0]);
84 
85  return static_cast<typename UpperType<T>::Type>(tab[0] + tab[4]);
86  }
87 };
88 
91 template <typename T>
93  typename boost::enable_if_c<boost::is_integral<T>::value
94  && sizeof(T) == 4>::type>
95 {
96  static IPSDK_FORCEINLINE
97  typename UpperType<T>::Type
98  act(const BasePack<ePackType::ePT_Avx, T>& in)
99  {
102  }
103 };
104 
107 template <typename T>
109  typename boost::enable_if_c<boost::is_integral<T>::value
110  && sizeof(T) == 8>::type>
111 {
112  static IPSDK_FORCEINLINE
113  typename UpperType<T>::Type
114  act(const BasePack<ePackType::ePT_Avx, T>& in)
115  {
117 
118  res._val[0] = _mm256_add_epi64(in._val[0], in._val[1]);
119  res._val[0] = _mm256_add_epi64(res._val[0], in._val[2]);
120  res._val[0] = _mm256_add_epi64(res._val[0], in._val[3]);
121  res._val[0] = _mm256_add_epi64(res._val[0], in._val[4]);
122  res._val[0] = _mm256_add_epi64(res._val[0], in._val[5]);
123  res._val[0] = _mm256_add_epi64(res._val[0], in._val[6]);
124  res._val[0] = _mm256_add_epi64(res._val[0], in._val[7]);
125 
126  res._val[0] =
127  _mm256_add_epi64(res._val[0], _mm256_srli_si256(res._val[0], 8));
128 
129  ipInt64 tab[16];
130  _mm256_storeu_si256(
131  reinterpret_cast<typename AvxType<T>::Type*>(tab), res._val[0]);
132 
133  return tab[0] + tab[2];
134  }
135 };
136 
139 template <>
141 {
142  static IPSDK_FORCEINLINE
145  {
147  res._val[0] = _mm256_add_pd(in._val[0], in._val[1]);
148  res._val[0] = _mm256_add_pd(res._val[0], in._val[2]);
149  res._val[0] = _mm256_add_pd(res._val[0], in._val[3]);
150  res._val[0] = _mm256_add_pd(res._val[0], in._val[4]);
151  res._val[0] = _mm256_add_pd(res._val[0], in._val[5]);
152  res._val[0] = _mm256_add_pd(res._val[0], in._val[6]);
153  res._val[0] = _mm256_add_pd(res._val[0], in._val[7]);
154 
156  _mm256_storeu_pd(tab, res._val[0]);
157 
158  return tab[0] + tab[1] + tab[2] + tab[3];
159  }
160 };
161 
164 template <>
166 {
167  static IPSDK_FORCEINLINE
170  {
172  cast<eInstructionSet::eIS_Avx2, ipReal64>(in));
173  }
174 };
175 
178 
179 } // end of namespace reduction
180 } // end of namespace detail
181 } // end of namespace simd
182 } // end of namespace ipsdk
183 
184 #endif // __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_AVX2_SUMPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
int16_t ipInt16
Base types definition.
Definition: BaseTypes.h:50
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
structure SumPack<eInstructionSet::domain instructionSet, typename T, typename Enable=void> ...
Definition: SumPack.h:40
IPSDK_FORCEINLINE ipsdk::simd::IS2Pack< instructionSet, TOut >::Type cast(const PackIn &in)
casts from PackIn type to PackOut type
Definition: cast.h:34
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
specialization of ipsdk::simd::detail::AddReg structure for AVX2 instruction set
int64_t ipInt64
Base types definition.
Definition: BaseTypes.h:54
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29