IPSDK  4_1_0_2
IPSDK : Image Processing Software Development Kit
CastPack.h
1 // CastPack.h:
3 // ------------
4 //
15 
16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__
17 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__
18 
26 
27 #include <boost/type_traits/is_signed.hpp>
28 
29 namespace ipsdk {
30 namespace simd {
31 namespace detail {
32 
35 
38 template <typename TIn, typename TOut>
39 struct CastPack<eInstructionSet::eIS_Avx, TIn, TOut,
40  typename boost::enable_if_c<boost::is_same<TIn, TOut>::value>::type>
41 {
42  static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, TOut>
43  act(const BasePack<ePackType::ePT_Avx, TIn>& in)
44  {
45  return in;
46  }
47 
48  static IPSDK_FORCEINLINE
49  void
50  act(const BasePack<ePackType::ePT_Avx, TIn>& in,
51  BasePack<ePackType::ePT_Avx, TOut> & out)
52  {
53  typedef typename AvxType<TIn>::Type RegType;
54  arrayop::copyArray<sizeof(TIn), RegType>(in._val, out._val);
55  }
56 
57  static IPSDK_FORCEINLINE
58  void
59  act(const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
60  BasePack<ePackType::ePT_Avx, TOut> & out)
61  {
62  typedef typename AvxType<TIn>::Type RegType;
63  arrayop::copyArray<sizeof(TIn), RegType>(inMask._val, out._val);
64  }
65 };
66 
69 template <>
70 struct CastPack<eInstructionSet::eIS_Avx, ipReal32, ipReal64>
71 {
72  static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, ipReal64>
73  act(const BasePack<ePackType::ePT_Avx, ipReal32>& in)
74  {
75  BasePack<ePackType::ePT_Avx, ipReal64> out;
76 
77  out._val[0] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 0));
78  out._val[1] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 1));
79 
80  out._val[2] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 0));
81  out._val[3] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 1));
82 
83  out._val[4] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 0));
84  out._val[5] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 1));
85 
86  out._val[6] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 0));
87  out._val[7] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 1));
88 
89  return out;
90  }
91 
92  static IPSDK_FORCEINLINE
93  void
94  act(const BasePack<ePackType::ePT_Avx, ipReal32>& in,
95  BasePack<ePackType::ePT_Avx, ipReal64>& out)
96  {
97  out._val[0] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 0));
98  out._val[1] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 1));
99 
100  out._val[2] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 0));
101  out._val[3] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 1));
102 
103  out._val[4] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 0));
104  out._val[5] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 1));
105 
106  out._val[6] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 0));
107  out._val[7] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 1));
108  }
109 };
110 
113 template <>
114 struct CastPack<eInstructionSet::eIS_Avx, ipReal64, ipReal32>
115 {
116  static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, ipReal32>
117  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in)
118  {
119  BasePack<ePackType::ePT_Avx, ipReal32> out;
120 
121  out._val[0] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[0]));
122  out._val[0] = _mm256_insertf128_ps(out._val[0],
123  _mm256_cvtpd_ps(in._val[1]), 1);
124 
125  out._val[1] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[2]));
126  out._val[1] = _mm256_insertf128_ps(out._val[0],
127  _mm256_cvtpd_ps(in._val[3]), 1);
128 
129  out._val[2] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[4]));
130  out._val[2] = _mm256_insertf128_ps(out._val[0],
131  _mm256_cvtpd_ps(in._val[5]), 1);
132 
133  out._val[3] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[6]));
134  out._val[3] = _mm256_insertf128_ps(out._val[0],
135  _mm256_cvtpd_ps(in._val[7]), 1);
136 
137  return out;
138  }
139 
140  static IPSDK_FORCEINLINE
141  void
142  act(const BasePack<ePackType::ePT_Avx, ipReal64>& in,
143  BasePack<ePackType::ePT_Avx, ipReal32>& out)
144  {
145  out._val[0] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[0]));
146  out._val[0] = _mm256_insertf128_ps(out._val[0],
147  _mm256_cvtpd_ps(in._val[1]), 1);
148 
149  out._val[1] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[2]));
150  out._val[1] = _mm256_insertf128_ps(out._val[1],
151  _mm256_cvtpd_ps(in._val[3]), 1);
152 
153  out._val[2] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[4]));
154  out._val[2] = _mm256_insertf128_ps(out._val[2],
155  _mm256_cvtpd_ps(in._val[5]), 1);
156 
157  out._val[3] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[6]));
158  out._val[3] = _mm256_insertf128_ps(out._val[3],
159  _mm256_cvtpd_ps(in._val[7]), 1);
160  }
161 };
162 
165 
166 } // end of namespace detail
167 } // end of namespace simd
168 } // end of namespace ipsdk
169 
170 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__
171 
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
BaseMaskPack class; defines a set of masks; the number of masks in this set depends on the type of th...
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions.
Definition: InstructionSetTypes.h:44
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56