This may get someone started on a PR or at least get @shirleyquirk 
a-benchmarkin': 
    
    
    import random # Lemire2018 https://r-libre.teluq.ca/1437/
    
    proc mul(a, b: uint64; hi, lo: var uint64) =
      # Store hi & low parts of full product a*b;
      # Note VM & JS branches need doing
      when defined(gcc)or defined(llvm_gcc)or defined(clang):
        {.emit: """__uint128_t r = `a`; r *= `b`;
                   *`hi` = r >> 64; *`lo` = r;""".}
      elif defined(windows) and not defined(tcc):
        proc umul128(a, b: uint64, c: ptr uint64): uint64
          {.importc: "_umul128", header: "intrin.h".}
        lo = umul128(a, b, addr hi)
    # else: some fallback impl using narrower arithmetic
    
    proc randExclusive*(r: var Rand; s: uint64): uint64 =
      ## Sample on range [0, s) given [0, 2^64) rng.
      var x = r.next
      var lo, hi: uint64
      mul(x, s, hi, lo)
      if lo < s:
        let t = (not s) mod s # not s == -s in 2's compl
        while lo < t:
          x = r.next
          mul(x, s, hi, lo)
      hi
    
    proc rand*(r: var Rand; max: Natural): int =
      ## Much faster than same in `random` module.
      int(r.randExclusive uint64(max + 1))
    
    when isMainModule:
      var sum = 0
      for trial in 1 .. 1_000_000_000:
        sum += randState.rand(10)
      echo sum
    
    
    Run

The paper says the Go stdlib uses this method, FWIW.

Reply via email to