This may get someone started on a PR or at least get @shirleyquirk
a-benchmarkin':
import random # Lemire2018 https://r-libre.teluq.ca/1437/
proc mul(a, b: uint64; hi, lo: var uint64) =
# Store hi & low parts of full product a*b;
# Note VM & JS branches need doing
when defined(gcc)or defined(llvm_gcc)or defined(clang):
{.emit: """__uint128_t r = `a`; r *= `b`;
*`hi` = r >> 64; *`lo` = r;""".}
elif defined(windows) and not defined(tcc):
proc umul128(a, b: uint64, c: ptr uint64): uint64
{.importc: "_umul128", header: "intrin.h".}
lo = umul128(a, b, addr hi)
# else: some fallback impl using narrower arithmetic
proc randExclusive*(r: var Rand; s: uint64): uint64 =
## Sample on range [0, s) given [0, 2^64) rng.
var x = r.next
var lo, hi: uint64
mul(x, s, hi, lo)
if lo < s:
let t = (not s) mod s # not s == -s in 2's compl
while lo < t:
x = r.next
mul(x, s, hi, lo)
hi
proc rand*(r: var Rand; max: Natural): int =
## Much faster than same in `random` module.
int(r.randExclusive uint64(max + 1))
when isMainModule:
var sum = 0
for trial in 1 .. 1_000_000_000:
sum += randState.rand(10)
echo sum
Run
The paper says the Go stdlib uses this method, FWIW.