15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANPACK_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANPACK_H__ 38 static IPSDK_FORCEINLINE
45 static IPSDK_FORCEINLINE
57 static IPSDK_FORCEINLINE
60 __m512 regOffset = _mm512_setzero_ps();
68 for (; i < nbAlignedElts; i += simd::NbEltsPerReg<ePackType::ePT_Avx512, ipReal32>::Value) {
73 regOut = _mm512_add_ps(regOut, regOffset);
75 regOffset = _mm512_permutex2var_ps(regOut, _mm512_set1_epi32(15), regOut);
77 for (; i < nbElts; ++i) {
80 out[i] = out[i - 1] + in[i];
88 static IPSDK_FORCEINLINE
91 __m512d regOffset = _mm512_setzero_pd();
99 for (; i < nbAlignedElts; i += simd::NbEltsPerReg<ePackType::ePT_Avx512, ipReal64>::Value) {
104 regOut = _mm512_add_pd(regOut, regOffset);
106 regOffset = _mm512_permutex2var_pd(regOut, _mm512_set1_epi64(63), regOut);
108 for (; i < nbElts; ++i) {
111 out[i] = out[i - 1] + in[i];
122 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_SCANPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
template structure which is specialized to implement the cumulative sum on 2 registers, depending on the used instructionSet and on the types of the buffers loaded in the registers
Definition: ScanReg.h:36
uint64_t ipUInt64
Base types definition.
Definition: BaseTypes.h:55
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition: NbEltsPerReg.h:28
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: ScanPack.h:35
Definition of import/export macro for library.
structure containing intrinsic registers used to store vectorized data
Definition: BaseReg.h:29
Definition: UnloadReg.h:30
Definition: LoadRegDecl.h:30
Definition: ScanPack.h:30
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
structure containing intrinsic registers used to store vectorized data
Definition: BasePackDecl.h:29