15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__ 19 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/ScanReg.h> 32 static IPSDK_FORCEINLINE
41 static IPSDK_FORCEINLINE
48 t0 = _mm256_permute_ps(in, _MM_SHUFFLE(2, 1, 0, 3));
49 t1 = _mm256_permute2f128_ps(t0, t0, 41);
50 out = _mm256_add_ps(in, _mm256_blend_ps(t0, t1, 0x11));
52 t0 = _mm256_permute_ps(out, _MM_SHUFFLE(1, 0, 3, 2));
53 t1 = _mm256_permute2f128_ps(t0, t0, 41);
54 out = _mm256_add_ps(out, _mm256_blend_ps(t0, t1, 0x33));
56 out = _mm256_add_ps(out,_mm256_permute2f128_ps(out, out, 41));
63 static IPSDK_FORCEINLINE
72 static IPSDK_FORCEINLINE
93 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX_SCANREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
template structure which is specialized to implement the cumulative sum on 2 registers, depending on the used instructionSet and on the types of the buffers loaded in the registers
Definition: ScanReg.h:36
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
Predefined types for Avx instruction set management.
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Advanced Vector Extensions.
Definition: InstructionSetTypes.h:44
Definition of import/export macro for library.
structure used to retrieve AVX type associated to a base type
Definition: AvxTypes.h:33
Definition: UnloadReg.h:30
Definition: LoadRegDecl.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56