15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_UNIFORMRANDOMLCGREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_UNIFORMRANDOMLCGREG_H__ 21 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h> 24 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/FloorReg.h> 25 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/SignedUnsignedOpReg.h> 32 #include <boost/type_traits/make_unsigned.hpp> 53 return _mm512_set_epi32(seed, seed+1, seed, seed+1,
54 seed, seed+1, seed, seed+1,
55 seed, seed+1, seed, seed+1,
56 seed, seed+1, seed, seed+1);
67 const FloatType fMultiplier =
68 (
static_cast<FloatType
>(nMax) - static_cast<FloatType>(nMin) + 1.0) /
70 return AssignRegFloat::act(fMultiplier);
79 return AssignRegFloat::act(static_cast<FloatType>(nMin));
88 const ipUInt32 mult[16] = { 214013, 17405, 214013, 69069, 1664525, 22695477, 1103515245, 134775813, 214013, 17405, 214013, 69069, 1664525, 22695477, 1103515245, 134775813 };
89 const ipUInt32 gadd[16] = { 2531011, 10395331, 13737667, 1, 1013904223, 1, 12345, 1, 2531011, 10395331, 13737667, 1, 1013904223, 1, 12345, 1 };
93 const __m512i multiplier = _mm512_loadu_si512((__m512i*) mult);
94 const __m512i adder = _mm512_loadu_si512((__m512i*) gadd);
111 const RegInt32 rMask7FFF = AssignRegInt32::act(0x7FFF);
112 const RegInt32 rMask3 = AssignRegInt32::act(3);
115 RegInt32 r1 = _mm512_srli_epi32(rSeed, 16);
116 r1 = BitwiseAndRegInt32::act(r1, rMask7FFF);
119 RegInt32 r2 = _mm512_srli_epi32(rSeed, 16);
120 r2 = BitwiseAndRegInt32::act(r2, rMask7FFF);
123 RegInt32 r3 = _mm512_srli_epi32(rSeed, 16);
124 r3 = BitwiseAndRegInt32::act(r3, rMask7FFF);
126 RegInt32 rRandVal = AddRegInt32::act(_mm512_slli_epi32(r1, 17),
127 _mm512_slli_epi32(r2, 2));
128 rRandVal = AddRegInt32::act(rRandVal,
129 BitwiseAndRegInt32::act(r3, rMask3));
147 RegInt32 rRandVal = computeRandom32bits(rSeed);
148 const RegFloat rUIntHlfRangeF = AssignRegFloat::act(2147483648.0);
150 RegFloat rRandValF1 =
151 _mm512_cvtepi32_pd(_mm512_castsi512_si256(rRandVal));
152 rRandValF1 = AddRegFloat::act(rRandValF1, rUIntHlfRangeF);
153 rRandValF1 = MulRegFloat::act(rRandValF1, rRangeMultiplier);
155 rRandValF1 = FloorRegFloat::act(rRandValF1);
156 rRandValF1 = AddRegFloat::act(rRandValF1, rRangeOffset);
158 const __m512i duplicateHiMask = _mm512_setr_epi32(
159 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
161 RegFloat rRandValF2 = _mm512_cvtepi32_pd(
162 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
163 rRandVal, duplicateHiMask, rRandVal)));
164 rRandValF2 = AddRegFloat::act(rRandValF2, rUIntHlfRangeF);
165 rRandValF2 = MulRegFloat::act(rRandValF2, rRangeMultiplier);
167 rRandValF2 = FloorRegFloat::act(rRandValF2);
168 rRandValF2 = AddRegFloat::act(rRandValF2, rRangeOffset);
171 RegInt32 rRet = _mm512_castsi256_si512(_mm512_cvtpd_epi32(rRandValF1));
172 rRet = _mm512_inserti64x4(
173 rRet, _mm512_cvtpd_epi32(rRandValF2), 1);
202 const FloatType fMultiplier =
203 (
static_cast<FloatType
>(nMax) - static_cast<FloatType>(nMin) + 1.0) /
205 return AssignRegFloat::act(fMultiplier);
214 return AssignRegFloat::act(static_cast<FloatType>(nMin) - 2147483648.0);
237 const RegInt32 rRandInt32 =
243 const RegInt32 rMinInt32 =
246 const RegUInt32 rRet =
257 template <
typename T>
259 typename boost::enable_if_c<
260 boost::is_integral<T>::value
277 computeRangeMultiplier(T tMin, T tMax)
282 const FloatType fMultiplier =
283 (
static_cast<FloatType
>(tMax) - static_cast<FloatType>(tMin) + 1.0f) /
285 return AssignRegFloat::act(fMultiplier);
291 computeRangeOffset(T tMin, T tMax)
295 return AssignRegFloat::act(static_cast<FloatType>(tMin));
310 computeRand16BitsOnInt32Vals(
325 const RegInt32 rMask7FFF = AssignRegInt32::act(0x7FFF);
326 const RegInt32 rMask1 = AssignRegInt32::act(1);
329 RegInt32 r1 = _mm512_srli_epi32(rSeed, 16);
330 r1 = BitwiseAndRegInt32::act(r1, rMask7FFF);
333 RegInt32 r2 = _mm512_srli_epi32(rSeed, 16);
334 r2 = BitwiseAndRegInt32::act(r2, rMask7FFF);
337 RegInt32 r3 = _mm512_srli_epi32(rSeed, 16);
338 r3 = BitwiseAndRegInt32::act(r3, rMask7FFF);
340 RegFloat rRandValF = _mm512_cvtepi32_ps(
341 AddRegInt32::act(_mm512_slli_epi32(r1, 1), BitwiseAndRegInt32::act(r2, rMask1)));
343 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
345 rRandValF = _mm512_cvtepi32_ps(_mm512_cvttps_epi32(rRandValF));
346 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
347 rVal1 = _mm512_cvtps_epi32(rRandValF);
349 rRandValF = _mm512_cvtepi32_ps(
351 _mm512_slli_epi32(r3, 1),
352 BitwiseAndRegInt32::act(_mm512_srli_epi32(r2, 1), rMask1)));
353 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
355 rRandValF = _mm512_cvtepi32_ps(_mm512_cvttps_epi32(rRandValF));
356 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
357 rVal2 = _mm512_cvtps_epi32(rRandValF);
370 RegInt32 rRandVal1, rRandVal2;
371 computeRand16BitsOnInt32Vals(
387 template <
typename T>
389 typename boost::enable_if_c<
390 boost::is_integral<T>::value
407 computeRangeMultiplier(T tMin, T tMax)
413 const FloatType fMultiplier =
414 (
static_cast<FloatType
>(tMax) - static_cast<FloatType>(tMin) + 1.0f) /
416 return AssignRegFloat::act(fMultiplier);
422 computeRangeOffset(T tMin, T tMax)
426 return AssignRegFloat::act(static_cast<FloatType>(tMin));
441 computeRandomOn32BitsVal(
454 const RegInt32 rMask = AssignRegInt32::act(0x7FFF);
457 RegFloat rRandValF = _mm512_cvtepi32_ps(
458 BitwiseAndRegInt32::act(_mm512_srli_epi32(rSeed, 16), rMask));
459 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
460 rRandValF = FloorRegFloat::act(rRandValF);
461 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
462 return _mm512_cvtps_epi32(rRandValF);
483 const RegInt32 rRandVal1 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
484 const RegInt32 rRandVal2 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
485 const RegInt32 rRandVal3 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
486 const RegInt32 rRandVal4 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
490 _mm512_packs_epi32(rRandVal1, rRandVal2),
491 _mm512_packs_epi32(rRandVal3, rRandVal4),
520 const FloatType fMultiplier =
521 (
static_cast<FloatType
>(fMax) - static_cast<FloatType>(fMin)) /
523 return AssignRegFloat::act(fMultiplier);
532 return AssignRegFloat::act(static_cast<FloatType>(fMin));
558 const RegInt32 rRandVal =
560 const RegFloat rUIntHlfRangeF = AssignRegFloat::act(2147483648.0);
562 RegFloat rRandValF1 =
563 _mm512_cvtepi32_pd(_mm512_castsi512_si256(rRandVal));
564 rRandValF1 = AddRegFloat::act(rRandValF1, rUIntHlfRangeF);
565 rRandValF1 = MulRegFloat::act(rRandValF1, rRangeMultiplier);
566 rRandValF1 = AddRegFloat::act(rRandValF1, rRangeOffset);
568 const __m512i duplicateHiMask = _mm512_setr_epi32(
569 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
571 RegFloat rRandValF2 = _mm512_cvtepi32_pd(
572 _mm512_castsi512_si256(_mm512_permutex2var_epi32(
573 rRandVal, duplicateHiMask, rRandVal)));
574 rRandValF2 = AddRegFloat::act(rRandValF2, rUIntHlfRangeF);
575 rRandValF2 = MulRegFloat::act(rRandValF2, rRangeMultiplier);
576 rRandValF2 = AddRegFloat::act(rRandValF2, rRangeOffset);
578 RegFloat32 rRet = _mm512_shuffle_f32x4(
579 _mm512_castps256_ps512(_mm512_cvtpd_ps(rRandValF1)),
580 _mm512_castps256_ps512(_mm512_cvtpd_ps(rRandValF2)),
593 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_AVX512_UNIFORMRANDOMLCGREG_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
template structure which is specialized to implement the computation of value rounded to closest even...
Definition: FloorReg.h:36
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
(including fundation and byte and word instructions)
Definition: InstructionSetTypes.h:51
Definition: NumericLimits.h:27
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: UniformRandomLCGReg.h:29
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Definition: SignedUnsignedOpReg.h:51
Definition: BitwiseAndReg.h:30
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53