15 #ifndef __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_UNIFORMRANDOMLCGREG_H__ 16 #define __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_SSE2_UNIFORMRANDOMLCGREG_H__ 21 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/AddReg.h> 24 #include <IPSDKUtil/InstructionSet/Arithmetic/detail/SignedUnsignedOpReg.h> 31 #include <boost/type_traits/make_unsigned.hpp> 52 return _mm_set_epi32(seed, seed+1, seed, seed+1);
63 const FloatType fMultiplier =
64 (
static_cast<FloatType
>(nMax) - static_cast<FloatType>(nMin) + 1.0) /
66 return AssignRegFloat::act(fMultiplier);
75 return AssignRegFloat::act(static_cast<FloatType>(nMin));
84 const ipUInt32 mult[4] = { 214013, 17405, 214013, 69069 };
85 const ipUInt32 gadd[4] = { 2531011, 10395331, 13737667, 1 };
88 const __m128i multiplier = _mm_loadu_si128((__m128i*) mult);
89 const __m128i adder = _mm_loadu_si128((__m128i*) gadd);
106 const RegInt32 rMask7FFF = AssignRegInt32::act(0x7FFF);
107 const RegInt32 rMask3 = AssignRegInt32::act(3);
110 RegInt32 r1 = _mm_srli_epi32(rSeed, 16);
111 r1 = BitwiseAndRegInt32::act(r1, rMask7FFF);
114 RegInt32 r2 = _mm_srli_epi32(rSeed, 16);
115 r2 = BitwiseAndRegInt32::act(r2, rMask7FFF);
118 RegInt32 r3 = _mm_srli_epi32(rSeed, 16);
119 r3 = BitwiseAndRegInt32::act(r3, rMask7FFF);
121 RegInt32 rRandVal = AddRegInt32::act(_mm_slli_epi32(r1, 17),
122 _mm_slli_epi32(r2, 2));
123 rRandVal = AddRegInt32::act(rRandVal,
124 BitwiseAndRegInt32::act(r3, rMask3));
144 RegInt32 rRandVal = computeRandom32bits(rSeed);
145 const RegFloat rUIntHlfRangeF = AssignRegFloat::act(2147483648.0);
147 RegFloat rRandValF1 = _mm_cvtepi32_pd(rRandVal);
148 rRandValF1 = AddRegFloat::act(rRandValF1, rUIntHlfRangeF);
149 rRandValF1 = MulRegFloat::act(rRandValF1, rRangeMultiplier);
154 const RegFloat rMaxInt32F = AssignRegFloat::act(2147483647.0);
155 RegFloat rGTMaxInt32F = IsGreaterRegFloat::act(rRandValF1, rMaxInt32F);
156 rRandValF1 = BitwiseSelectRegFloat::act(rGTMaxInt32F,
157 AddRegFloat::act(_mm_cvtepi32_pd(_mm_cvttpd_epi32(SubRegFloat::act(rRandValF1, rMaxInt32F))), rMaxInt32F),
158 _mm_cvtepi32_pd(_mm_cvttpd_epi32(rRandValF1)));
160 rRandValF1 = AddRegFloat::act(rRandValF1, rRangeOffset);
162 RegFloat rRandValF2 = _mm_cvtepi32_pd(_mm_srli_si128(rRandVal, 8));
163 rRandValF2 = AddRegFloat::act(rRandValF2, rUIntHlfRangeF);
164 rRandValF2 = MulRegFloat::act(rRandValF2, rRangeMultiplier);
166 rGTMaxInt32F = IsGreaterRegFloat::act(rRandValF2, rMaxInt32F);
167 rRandValF2 = BitwiseSelectRegFloat::act(rGTMaxInt32F,
168 AddRegFloat::act(_mm_cvtepi32_pd(_mm_cvttpd_epi32(SubRegFloat::act(rRandValF2, rMaxInt32F))), rMaxInt32F),
169 _mm_cvtepi32_pd(_mm_cvttpd_epi32(rRandValF2)));
170 rRandValF2 = AddRegFloat::act(rRandValF2, rRangeOffset);
172 RegInt32 rRandVal1 = _mm_cvtpd_epi32(rRandValF1);
173 RegInt32 rRandVal2 = _mm_cvtpd_epi32(rRandValF2);
175 __m128 rRandVal1F32 = _mm_castsi128_ps(rRandVal1);
176 __m128 rRandVal2F32 = _mm_castsi128_ps(rRandVal2);
177 __m128 rRandValF32 = _mm_shuffle_ps(rRandVal1F32, rRandVal2F32, _MM_SHUFFLE(1, 0, 1, 0));
178 return _mm_castps_si128(rRandValF32);
205 const FloatType fMultiplier =
206 (
static_cast<FloatType
>(nMax) - static_cast<FloatType>(nMin) + 1.0) /
208 return AssignRegFloat::act(fMultiplier);
217 return AssignRegFloat::act(static_cast<FloatType>(nMin) - 2147483648.0);
240 const RegInt32 rRandInt32 =
246 const RegInt32 rMinInt32 =
250 rRandInt32, rMinInt32);
257 template <
typename T>
259 typename boost::enable_if_c<
260 boost::is_integral<T>::value
277 computeRangeMultiplier(T tMin, T tMax)
282 const FloatType fMultiplier =
283 (
static_cast<FloatType
>(tMax) - static_cast<FloatType>(tMin) + 1.0f) /
285 return AssignRegFloat::act(fMultiplier);
291 computeRangeOffset(T tMin, T tMax)
295 return AssignRegFloat::act(static_cast<FloatType>(tMin));
310 computeRand16BitsOnInt32Vals(
325 const RegInt32 rMask7FFF = AssignRegInt32::act(0x7FFF);
326 const RegInt32 rMask1 = AssignRegInt32::act(1);
329 RegInt32 r1 = _mm_srli_epi32(rSeed, 16);
330 r1 = BitwiseAndRegInt32::act(r1, rMask7FFF);
333 RegInt32 r2 = _mm_srli_epi32(rSeed, 16);
334 r2 = BitwiseAndRegInt32::act(r2, rMask7FFF);
337 RegInt32 r3 = _mm_srli_epi32(rSeed, 16);
338 r3 = BitwiseAndRegInt32::act(r3, rMask7FFF);
340 RegFloat rRandValF = _mm_cvtepi32_ps(
341 AddRegInt32::act(_mm_slli_epi32(r1, 1), BitwiseAndRegInt32::act(r2, rMask1)));
343 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
345 rRandValF = _mm_cvtepi32_ps(_mm_cvttps_epi32(rRandValF));
346 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
347 rVal1 = _mm_cvtps_epi32(rRandValF);
349 rRandValF = _mm_cvtepi32_ps(
351 _mm_slli_epi32(r3, 1),
352 BitwiseAndRegInt32::act(_mm_srli_epi32(r2, 1), rMask1)));
353 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
355 rRandValF = _mm_cvtepi32_ps(_mm_cvttps_epi32(rRandValF));
356 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
357 rVal2 = _mm_cvtps_epi32(rRandValF);
370 RegInt32 rRandVal1, rRandVal2;
371 computeRand16BitsOnInt32Vals(
388 template <
typename T>
390 typename boost::enable_if_c<
391 boost::is_integral<T>::value
408 computeRangeMultiplier(T tMin, T tMax)
414 const FloatType fMultiplier =
415 (
static_cast<FloatType
>(tMax) - static_cast<FloatType>(tMin) + 1.0f) /
417 return AssignRegFloat::act(fMultiplier);
423 computeRangeOffset(T tMin, T tMax)
427 return AssignRegFloat::act(static_cast<FloatType>(tMin));
442 computeRandomOn32BitsVal(
454 const RegInt32 rMask = AssignRegInt32::act(0x7FFF);
457 RegFloat rRandValF = _mm_cvtepi32_ps(
458 BitwiseAndRegInt32::act(_mm_srli_epi32(rSeed, 16), rMask));
459 rRandValF = MulRegFloat::act(rRandValF, rRangeMultiplier);
461 rRandValF = _mm_cvtepi32_ps(_mm_cvttps_epi32(rRandValF));
462 rRandValF = AddRegFloat::act(rRandValF, rRangeOffset);
463 return _mm_cvtps_epi32(rRandValF);
484 const RegInt32 rRandVal1 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
485 const RegInt32 rRandVal2 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
486 const RegInt32 rRandVal3 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
487 const RegInt32 rRandVal4 = computeRandomOn32BitsVal(rRangeMultiplier, rRangeOffset, rSeed);
491 _mm_packs_epi32(rRandVal1, rRandVal2),
492 _mm_packs_epi32(rRandVal3, rRandVal4),
521 const FloatType fMultiplier =
522 (
static_cast<FloatType
>(fMax) - static_cast<FloatType>(fMin)) /
524 return AssignRegFloat::act(fMultiplier);
533 return AssignRegFloat::act(static_cast<FloatType>(fMin));
558 const RegInt32 rRandVal =
560 const RegFloat rUIntHlfRangeF = AssignRegFloat::act(2147483648.0);
562 RegFloat rRandValF1 = _mm_cvtepi32_pd(rRandVal);
563 rRandValF1 = AddRegFloat::act(rRandValF1, rUIntHlfRangeF);
564 rRandValF1 = MulRegFloat::act(rRandValF1, rRangeMultiplier);
565 rRandValF1 = AddRegFloat::act(rRandValF1, rRangeOffset);
567 RegFloat rRandValF2 = _mm_cvtepi32_pd(_mm_srli_si128(rRandVal, 8));
568 rRandValF2 = AddRegFloat::act(rRandValF2, rUIntHlfRangeF);
569 rRandValF2 = MulRegFloat::act(rRandValF2, rRangeMultiplier);
570 rRandValF2 = AddRegFloat::act(rRandValF2, rRangeOffset);
572 return _mm_shuffle_ps(
573 _mm_cvtpd_ps(rRandValF1), _mm_cvtpd_ps(rRandValF2),
574 _MM_SHUFFLE(1, 0, 1, 0));
585 #endif // __IPSDKUTIL_INSTRUCTIONSET_ARITHMETIC_DETAIL_STD_ABSPACK_H__
Defines the IPSDK_FORCEINLINE.
Main namespace for IPSDK library.
Definition: AlgorithmFunctionEfficiency.h:22
Definition: IsGreaterRegDecl.h:30
double ipReal64
Base types definition.
Definition: BaseTypes.h:57
int32_t ipInt32
Base types definition.
Definition: BaseTypes.h:52
Definition: NumericLimits.h:27
Definition: BitwiseSelectReg.h:30
eInstructionSet
Enumerate for processor instruction set description.
Definition: InstructionSetTypes.h:31
Definition: UniformRandomLCGReg.h:29
Definition of import/export macro for library.
template structure which is specialized to implement the arithmetic addition on 2 scalars or 2 regist...
Definition: AddReg.h:37
Streaming SIMD Extensions 2.
Definition: InstructionSetTypes.h:36
Definition: SignedUnsignedOpReg.h:51
Definition: BitwiseAndReg.h:30
Definition: AssignRegDecl.h:31
float ipReal32
Base types definition.
Definition: BaseTypes.h:56
uint32_t ipUInt32
Base types definition.
Definition: BaseTypes.h:53