IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
AssignRegImpl.h
Go to the documentation of this file.
1 // AssignRegImpl.h:
3 // ------------
4 //
14 
15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_ASSIGNREGIMPL_H__
16 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_ASSIGNREGIMPL_H__
17 
20 
21 namespace ipsdk {
22 namespace simd {
23 namespace detail {
24 
27 
28 template <typename T>
29 IPSDK_FORCEINLINE
30 typename AvxType<T>::Type
31 AssignReg<eInstructionSet::eIS_Avx2, T,
32  typename boost::enable_if_c<sizeof(T) == 1>::type>::act(const T& value)
33 {
34  return _mm256_set1_epi8(value);
35 }
36 
37 template <typename T>
38 IPSDK_FORCEINLINE
39 void
40 AssignReg<eInstructionSet::eIS_Avx2, T,
41  typename boost::enable_if_c<sizeof(T) == 1>::type>::act(typename AvxType<T>::Type& reg, const T& value)
42 {
43  reg = _mm256_set1_epi8(value);
44 }
45 
46 template <typename T>
47 IPSDK_FORCEINLINE
48 typename AvxType<T>::Type
49 AssignReg<eInstructionSet::eIS_Avx2, T,
50  typename boost::enable_if_c<sizeof(T) == 2>::type>::act(const T& value)
51 {
52  return _mm256_set1_epi16(value);
53 }
54 
55 template <typename T>
56 IPSDK_FORCEINLINE
57 void
58 AssignReg<eInstructionSet::eIS_Avx2, T,
59  typename boost::enable_if_c<sizeof(T) == 2>::type>::act(typename AvxType<T>::Type& reg, const T& value)
60 {
61  reg = _mm256_set1_epi16(value);
62 }
63 
64 template <typename T>
65 IPSDK_FORCEINLINE
66 typename AvxType<T>::Type
67 AssignReg<eInstructionSet::eIS_Avx2, T,
68  typename boost::enable_if_c<boost::is_integral<T>::value
69  && sizeof(T) == 4>::type>::act(const T& value)
70 {
71  return _mm256_set1_epi32(value);
72 }
73 
74 template <typename T>
75 IPSDK_FORCEINLINE
76 void
77 AssignReg<eInstructionSet::eIS_Avx2, T,
78  typename boost::enable_if_c<boost::is_integral<T>::value
79  && sizeof(T) == 4>::type>::act(typename AvxType<T>::Type& reg, const T& value)
80 {
81  reg = _mm256_set1_epi32(value);
82 }
83 
84 template <typename T>
85 IPSDK_FORCEINLINE
86 typename AvxType<T>::Type
87 AssignReg<eInstructionSet::eIS_Avx2, T,
88  typename boost::enable_if_c<boost::is_integral<T>::value
89  && sizeof(T) == 8>::type>::act(const T& value)
90 {
91  // there's no instruction such as "_mm256_set1_epi32" for 64 bits integers,
92  // so we use a work-around...
93  T values[4];
94  values[0] = values[1] = values[2] = values[3] = value;
95  return _mm256_loadu_si256(
96  reinterpret_cast<const typename AvxType<T>::Type*>(values));
97 }
98 
99 template <typename T>
100 IPSDK_FORCEINLINE
101 void
102 AssignReg<eInstructionSet::eIS_Avx2, T,
103  typename boost::enable_if_c<boost::is_integral<T>::value
104  && sizeof(T) == 8>::type>::act(typename AvxType<T>::Type& reg, const T& value)
105 {
106  // there's no instruction such as "_mm256_set1_epi32" for 64 bits integers,
107  // so we use a work-around...
108  T values[4];
109  values[0] = values[1] = values[2] = values[3] = value;
110  reg = _mm256_loadu_si256(
111  reinterpret_cast<const typename AvxType<T>::Type*>(values));
112 }
113 
114 // load implementation for AVX2 for real32 types
115 IPSDK_FORCEINLINE
116 AvxType<ipReal32>::Type
117 AssignReg<eInstructionSet::eIS_Avx2, ipReal32>::act(const ipReal32& value)
118 {
119  return AssignReg<eInstructionSet::eIS_Avx, ipReal32>::act(value);
120 }
121 
122 IPSDK_FORCEINLINE
123 void
124 AssignReg<eInstructionSet::eIS_Avx2, ipReal32>::act(AvxType<ipReal32>::Type& reg, const ipReal32& value)
125 {
126  AssignReg<eInstructionSet::eIS_Avx, ipReal32>::act(reg, value);
127 }
128 
129 // load implementation for AVX2 for real64 types
130 IPSDK_FORCEINLINE
131 AvxType<ipReal64>::Type
132 AssignReg<eInstructionSet::eIS_Avx2, ipReal64>::act(const ipReal64& value)
133 {
134  return AssignReg<eInstructionSet::eIS_Avx, ipReal64>::act(value);
135 }
136 
137 IPSDK_FORCEINLINE
138 void
139 AssignReg<eInstructionSet::eIS_Avx2, ipReal64>::act(AvxType<ipReal64>::Type& reg, const ipReal64& value)
140 {
141  AssignReg<eInstructionSet::eIS_Avx, ipReal64>::act(reg, value);
142 }
143 
146 
147 } // end of namespace detail
148 } // end of namespace simd
149 } // end of namespace ipsdk
150 
152 
153 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX2_ASSIGNREGIMPL_H__
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
float ipReal32
Base types definition.
Definition: BaseTypes.h:56