15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_SCANREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_SCANREG_H__ 19 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/ScanReg.h> 29 static IPSDK_FORCEINLINE
36 static IPSDK_FORCEINLINE
48 static IPSDK_FORCEINLINE
55 static IPSDK_FORCEINLINE
62 t0 = _mm256_permute4x64_pd(in, 0x90);
63 t1 = _mm256_permute2f128_pd(t0, t0, 41);
65 t2 = _mm256_blend_pd(t0, t1, 0x8);
66 out = _mm256_add_pd(in, _mm256_blend_pd(t0, t1, 0x1));
69 t1 = _mm256_permute2f128_pd(out, out, 41);
70 out = _mm256_add_pd(out, t1);
82 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX2_SCANREG_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
template structure which is specialized to implement the cumulative sum on 2 registers, depending on the used instructionSet and on the types of the buffers loaded in the registers
Definition: ScanReg.h:36
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions 2.
Definition: InstructionSetTypes.h:48
Definition of import/export macro for library.
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
float ipReal32
Base types definition.
Definition: BaseTypes.h:56