16 #ifndef __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__ 17 #define __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__ 27 #include <boost/type_traits/is_signed.hpp> 38 template <
typename TIn,
typename TOut>
40 typename boost::enable_if_c<boost::is_same<TIn, TOut>::value>::type>
42 static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, TOut>
43 act(
const BasePack<ePackType::ePT_Avx, TIn>& in)
48 static IPSDK_FORCEINLINE
50 act(
const BasePack<ePackType::ePT_Avx, TIn>& in,
51 BasePack<ePackType::ePT_Avx, TOut> & out)
53 typedef typename AvxType<TIn>::Type RegType;
54 arrayop::copyArray<sizeof(TIn), RegType>(in._val, out._val);
57 static IPSDK_FORCEINLINE
59 act(
const BaseMaskPack<ePackType::ePT_Avx, TIn>& inMask,
60 BasePack<ePackType::ePT_Avx, TOut> & out)
62 typedef typename AvxType<TIn>::Type RegType;
63 arrayop::copyArray<sizeof(TIn), RegType>(inMask._val, out._val);
72 static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, ipReal64>
73 act(
const BasePack<ePackType::ePT_Avx, ipReal32>& in)
75 BasePack<ePackType::ePT_Avx, ipReal64> out;
77 out._val[0] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 0));
78 out._val[1] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 1));
80 out._val[2] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 0));
81 out._val[3] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 1));
83 out._val[4] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 0));
84 out._val[5] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 1));
86 out._val[6] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 0));
87 out._val[7] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 1));
92 static IPSDK_FORCEINLINE
94 act(
const BasePack<ePackType::ePT_Avx, ipReal32>& in,
95 BasePack<ePackType::ePT_Avx, ipReal64>& out)
97 out._val[0] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 0));
98 out._val[1] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[0], 1));
100 out._val[2] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 0));
101 out._val[3] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[1], 1));
103 out._val[4] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 0));
104 out._val[5] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[2], 1));
106 out._val[6] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 0));
107 out._val[7] = _mm256_cvtps_pd(_mm256_extractf128_ps(in._val[3], 1));
116 static IPSDK_FORCEINLINE BasePack<ePackType::ePT_Avx, ipReal32>
117 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in)
119 BasePack<ePackType::ePT_Avx, ipReal32> out;
121 out._val[0] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[0]));
122 out._val[0] = _mm256_insertf128_ps(out._val[0],
123 _mm256_cvtpd_ps(in._val[1]), 1);
125 out._val[1] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[2]));
126 out._val[1] = _mm256_insertf128_ps(out._val[0],
127 _mm256_cvtpd_ps(in._val[3]), 1);
129 out._val[2] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[4]));
130 out._val[2] = _mm256_insertf128_ps(out._val[0],
131 _mm256_cvtpd_ps(in._val[5]), 1);
133 out._val[3] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[6]));
134 out._val[3] = _mm256_insertf128_ps(out._val[0],
135 _mm256_cvtpd_ps(in._val[7]), 1);
140 static IPSDK_FORCEINLINE
142 act(
const BasePack<ePackType::ePT_Avx, ipReal64>& in,
143 BasePack<ePackType::ePT_Avx, ipReal32>& out)
145 out._val[0] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[0]));
146 out._val[0] = _mm256_insertf128_ps(out._val[0],
147 _mm256_cvtpd_ps(in._val[1]), 1);
149 out._val[1] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[2]));
150 out._val[1] = _mm256_insertf128_ps(out._val[1],
151 _mm256_cvtpd_ps(in._val[3]), 1);
153 out._val[2] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[4]));
154 out._val[2] = _mm256_insertf128_ps(out._val[2],
155 _mm256_cvtpd_ps(in._val[5]), 1);
157 out._val[3] = _mm256_castps128_ps256(_mm256_cvtpd_ps(in._val[6]));
158 out._val[3] = _mm256_insertf128_ps(out._val[3],
159 _mm256_cvtpd_ps(in._val[7]), 1);
170 #endif // __IPSDKUTIL_INSTRUCTIONSET_DETAIL_AVX_CASTPACK_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
BaseMaskPack class; defines a set of masks; the number of masks in this set depends on the type of th...
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
utility functions for array operations
BasePack class; defines a set of scalars (for instruction set "standard") or registers (for all other...
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions.
Definition: InstructionSetTypes.h:44
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56