IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
SumPack.h
Go to the documentation of this file.
1 // SumPack.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__
17 
23 
24 namespace ipsdk {
25 namespace simd {
26 namespace detail {
27 namespace reduction {
28 
31 
34 template <typename T>
36  typename boost::enable_if_c<
37  boost::is_integral<T>::value
38  && sizeof(T) == 1
39  >::type
40  >
41 {
42  static IPSDK_FORCEINLINE
43  typename UpperType<T>::Type
44  act(const BasePack<ePackType::ePT_Sse, T>& in)
45  {
48  Pack16 p16 =
49  cast<eInstructionSet::eIS_Sse2, typename UpperType<T>::Type>(in);
50 
51  p16._val[0] = AddR::act(p16._val[0], p16._val[1]);
52  p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 8));
53  p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 4));
54  p16._val[0] = AddR::act(p16._val[0], _mm_srli_si128(p16._val[0], 2));
55 
56  return static_cast<typename UpperType<T>::Type>(
57  _mm_extract_epi16(p16._val[0], 0));
58  }
59 };
60 
63 template <typename T>
65  typename boost::enable_if_c<
66  boost::is_integral<T>::value
67  && sizeof(T) == 2
68  >::type
69  >
70 {
71  static IPSDK_FORCEINLINE
72  typename UpperType<T>::Type
73  act(const BasePack<ePackType::ePT_Sse, T>& in)
74  {
77 
78  Pack32 in32 =
79  cast<eInstructionSet::eIS_Sse2, typename UpperType<T>::Type>(in);
80 
81  in32._val[0] = AddR::act(in32._val[0], in32._val[1]);
82  in32._val[0] = AddR::act(in32._val[0], in32._val[2]);
83  in32._val[0] = AddR::act(in32._val[0], in32._val[3]);
84  in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 16));
85  in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 8));
86  in32._val[0] = AddR::act(in32._val[0], _mm_srli_si128(in32._val[0], 4));
87 
88  typename UpperType<T>::Type tab[4];
90  tab);
91 
92  return tab[0];
93  }
94 };
95 
98 template <typename T>
100  typename boost::enable_if_c<boost::is_integral<T>::value
101  && sizeof(T) == 4>::type>
102 {
103  static IPSDK_FORCEINLINE
104  typename UpperType<T>::Type
105  act(const BasePack<ePackType::ePT_Sse, T>& in)
106  {
109  }
110 };
111 
114 template <typename T>
116  typename boost::enable_if_c<boost::is_integral<T>::value
117  && sizeof(T) == 8>::type>
118 {
119  static IPSDK_FORCEINLINE
120  typename UpperType<T>::Type
121  act(const BasePack<ePackType::ePT_Sse, T>& in)
122  {
124  res._val[0] = _mm_add_epi64(in._val[0], in._val[1]);
125  res._val[0] = _mm_add_epi64(res._val[0], in._val[2]);
126  res._val[0] = _mm_add_epi64(res._val[0], in._val[3]);
127  res._val[0] = _mm_add_epi64(res._val[0], in._val[4]);
128  res._val[0] = _mm_add_epi64(res._val[0], in._val[5]);
129  res._val[0] = _mm_add_epi64(res._val[0], in._val[6]);
130  res._val[0] = _mm_add_epi64(res._val[0], in._val[7]);
131 
132  res._val[0] =
133  _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 32));
134  res._val[0] =
135  _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 16));
136  res._val[0] =
137  _mm_add_epi64(res._val[0], _mm_srli_si128(res._val[0], 8));
138 
139  T tab[2];
141  tab);
142 
143  return tab[0];
144  }
145 };
146 
149 template <>
151 {
152  static IPSDK_FORCEINLINE
155  {
157  res._val[0] = _mm_add_pd(in._val[0], in._val[1]);
158  res._val[0] = _mm_add_pd(res._val[0], in._val[2]);
159  res._val[0] = _mm_add_pd(res._val[0], in._val[3]);
160  res._val[0] = _mm_add_pd(res._val[0], in._val[4]);
161  res._val[0] = _mm_add_pd(res._val[0], in._val[5]);
162  res._val[0] = _mm_add_pd(res._val[0], in._val[6]);
163  res._val[0] = _mm_add_pd(res._val[0], in._val[7]);
164 
166  _mm_storeu_pd(tab, res._val[0]);
167 
168  return tab[0] + tab[1];
169  }
170 };
171 
174 template <>
176 {
177  static IPSDK_FORCEINLINE
180  {
182  cast<eInstructionSet::eIS_Sse2, ipReal64>(in));
183  }
184 };
185 
188 
189 } // end of namespace reduction
190 } // end of namespace detail
191 } // end of namespace simd
192 } // end of namespace ipsdk
193 
194 #endif // __IPSDKUTIL_INSTRUCTIONSET_REDUCTION_DETAIL_SSE2_SUMPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
cast function; casts a Pack<instructionSet, TIn> to a Pack<instructionSet, TOut>
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
specialization of ipsdk::simd::detail::AddReg structure for SSE2 instruction set
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
template structure UpperType<typename T>; its typedef Type gives the type just upper to T...
Definition: UpperType.h:42
structure SumPack<eInstructionSet::domain instructionSet, typename T, typename Enable=void> ...
Definition: SumPack.h:40
IPSDK_FORCEINLINE ipsdk::simd::IS2Pack< instructionSet, TOut >::Type cast(const PackIn &in)
casts from PackIn type to PackOut type
Definition: cast.h:34
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
Definition: UnloadReg.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29