IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
ScanReg.h
Go to the documentation of this file.
1 // ScanReg.h:
3 // -------------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__
17 
19 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/ScanReg.h>
24 
25 namespace ipsdk {
26 namespace simd {
27 namespace detail {
28 
29 template <>
31 {
32  static IPSDK_FORCEINLINE
34  act(const AvxType<ipReal32>::Type& in)
35  {
36  __m256 out;
37  act(in, out);
38  return out;
39  }
40 
41  static IPSDK_FORCEINLINE
42  void
43  act(const AvxType<ipReal32>::Type& in,
45  {
46  __m256 t0, t1;
47  //shift1_AVX + add
48  t0 = _mm256_permute_ps(in, _MM_SHUFFLE(2, 1, 0, 3));
49  t1 = _mm256_permute2f128_ps(t0, t0, 41);
50  out = _mm256_add_ps(in, _mm256_blend_ps(t0, t1, 0x11));
51  //shift2_AVX + add
52  t0 = _mm256_permute_ps(out, _MM_SHUFFLE(1, 0, 3, 2));
53  t1 = _mm256_permute2f128_ps(t0, t0, 41);
54  out = _mm256_add_ps(out, _mm256_blend_ps(t0, t1, 0x33));
55  //shift3_AVX + add
56  out = _mm256_add_ps(out,_mm256_permute2f128_ps(out, out, 41));
57  }
58 };
59 
60 template <>
62 {
63  static IPSDK_FORCEINLINE
65  act(const AvxType<ipReal64>::Type& in)
66  {
67  __m256d out;
68  act(in, out);
69  return out;
70  }
71 
72  static IPSDK_FORCEINLINE
73  void
74  act(const AvxType<ipReal64>::Type& in,
76  {
77  ipReal64 pIn[4];
79  pIn[1] += pIn[0];
80  pIn[2] += pIn[1];
81  pIn[3] += pIn[2];
83  }
84 };
85 
88 
89 } // end of namespace detail
90 } // end of namespace simd
91 } // end of namespace ipsdk
92 
93 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
template structure which is specialized to implement the cumulative sum on 2 registers, depending on the used instructionSet and on the types of the buffers loaded in the registers
Definition: ScanReg.h:36
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Predefined types for Avx instruction set management.
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions.
Definition: InstructionSetTypes.h:44
Definition of import/export macro for library.
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
Definition: UnloadReg.h:30
Definition: LoadRegDecl.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56