15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANREG_H__ 20 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/ScanReg.h> 30 static IPSDK_FORCEINLINE
39 static IPSDK_FORCEINLINE
46 t0 = _mm512_maskz_permutex2var_ps(0xfffe, in, _mm512_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14), in);
47 out = _mm512_add_ps(in, t0);
50 t0 = _mm512_maskz_permutex2var_ps(0xfffc, out, _mm512_setr_epi32(0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13), out);
51 out = _mm512_add_ps(out, t0);
54 t0 = _mm512_maskz_permutex2var_ps(0xfff0, out, _mm512_setr_epi32(0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), out);
55 out = _mm512_add_ps(out, t0);
58 t0 = _mm512_maskz_permutex2var_ps(0xff00, out, _mm512_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7), out);
59 out = _mm512_add_ps(out, t0);
66 static IPSDK_FORCEINLINE
75 static IPSDK_FORCEINLINE
82 t0 = _mm512_maskz_permutex2var_pd(0xfe, in, _mm512_setr_epi64(0, 0, 1, 2, 3, 4, 5, 6), in);
83 out = _mm512_add_pd(in, t0);
86 t0 = _mm512_maskz_permutex2var_pd(0xfc, out, _mm512_setr_epi64(0, 0, 0, 1, 2, 3, 4, 5), out);
87 out = _mm512_add_pd(out, t0);
90 t0 = _mm512_maskz_permutex2var_pd(0xf0, out, _mm512_setr_epi64(0, 0, 0, 0, 0, 1, 2, 3), out);
91 out = _mm512_add_pd(out, t0);
102 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANREG_H__ Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
template structure which is specialized to implement the cumulative sum on 2 registers, depending on the used instructionSet and on the types of the buffers loaded in the registers
Definition: ScanReg.h:36
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Predefined types for Avx512 instruction set management.
Definition of import/export macro for library.
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure used to retrieve AVX512 type associated to a base type
Definition: Avx512Types.h:36