I just made a small test and increased the ref count of each string in the 
array at the beginning and decreased it at the end. The result is, that the 
time of array variant decreased from 15sec to 2 sec on my PC.
    
    
    import random
    import hashes
    import times
    import math
    import bitops
    import strutils
    
    template masked(i: untyped): typed {.dirty.}=
      i and m.hi
    
    type
      Entry[K, V] = object
        k: K # key
        v: V # value
        p, n: int # previous, next
      
      Map[K, V] = object
        hv: seq[int] # hash value
        d: seq[Entry[K, V]] # data
        first, last: int
        numentries: int
        hi: int
    
    proc hnz[K](x: K): Hash =
      result = hash($x)
      if result == 0:
        result = -1
    
    proc newTable[K, V](initialCapacity: Natural; maxFillRate: range[10 .. 95] 
= 50): Map[K, V] =
      ## fillrate is the desired maximum fill of hash buffer in percent, value 
should
      ## be in the range 10 to 95%. Small value gives optimal performance.
      ## if capacity is a power of 2, then we start with a buffer of that size
      ## otherwise we create a initial buffer which can contain capacity 
elements
      ## taking into acount the desired maximum fill rate
      ## whenever actual fill rate is larger than desired value, buffer is 
increased
      ## by factor of two. Actual buffer size is always a power of two.
      # we may care a bit for arithmetic overflow on pure 32 bit: cap = 1e8, 
fillrate 100%
      var h = initialCapacity
      if countSetBits(h) > 1:
        if h < 1e7.int: # accuracy, but prevent overflow on 32 bit
          h = h * 100 div maxFillrate
        else:
          h = h div maxFillrate * 100
      # if h < 8: h = 8 # tiny values makes no real sense and may generate 
trouble
      h = nextPowerOfTwo(h)
      #result.data = newSeq[Entry[K, V]](result.hi)
      #dec(result.hi)
      result.numentries = 0
      #result.fillrate = maxFillrate
      #if result.hi < 1e7.int:
      #  result.cfr = result.hi * fillrate div 100
      #else:
      #  result.cfr = result.hi div 100 * fillrate
      #assert(result.cfr < result.hi)
      
      result.d = newSeq[Entry[K, V]](h)
      result.hv = newSeq[int](h)
      result.hi = h - 1
    
    proc insert[K, V](m: var Map[K, V]; key: K; val: V) =
      let h = hnz(key)
      let p = masked(h)
      var i = p
      var j = p
      while m.hv[masked(i)] != 0:
        inc(i)
      while true:
        j = i
        i = masked(i - 1)
        if i - masked(m.hv[i]) < i - p:
          m.hv[j] = m.hv[i]
          m.d[j] = m.d[i]
        else:
          break
      m.hv[j] = h
      m.d[j].v = val
      m.d[j].k = key
      m.d[j].n = m.first
      m.first = j
    
    proc contains[K, V](m: Map[K, V]; key: K): bool =
      let h = hnz(key)
      let p = masked(h)
      var i = p
      while true:
        let hv = m.hv[i]
        if hv == h:
          return m.d[i].k == key
        elif hv == 0 or masked(hv) > p:
          return false
        i = masked(i + 1)
    
    proc get[K, V](m: Map[K, V]; key: K): V =
      let h = hnz(key)
      let p = masked(h)
      var i = p
      while true:
        let hv = m.hv[i]
        if hv == h and m.d[i].k == key:
          return m.d[i].v
        elif hv == 0 or masked(hv) > p:
          when compiles($key):
            raise newException(KeyError, "key not found: " & $key)
          else:
            raise newException(KeyError, "key not found")
        i = masked(i + 1)
    
    proc test() =
      const
        N = 1024 * 32
        N2 = N div 2
      var m = newTable[string, int](N)
      var a: array[N2, string]
      #var a = newSeq[string](N2)
      for i in 0 .. a.high: a[i] = $i
      # increase the ref count of each string in the array
      for i in 0 .. a.high: GC_ref(a[i])
      a.shuffle
      
      #echo "aaa"
      for i in 0 .. a.len div 2 - 1:
        m.insert(a[i], parseInt(a[i]))
      #echo "bbb"
      for i in 0 .. a.len div 2 - 1:
        assert(m.contains(a[i]))
        assert(m.get(a[i]) == parseInt(a[i]))
      #echo "ccc"
      for i in a.len div 2 .. a.high:
        assert(not m.contains(a[i]))
      #GC_disable()
      m = newTable[string, int](N)
      var t = cpuTime()
      for p, v in a:
        m.insert(v, p)
      echo((cpuTime() - t) / a.len.float)
      #echo "ddd"
      t = cpuTime()
      for p, v in a:
        doassert(m.get(v) >= 0)
      echo((cpuTime() - t) / a.len.float)
      # decrease it
      for i in 0 .. a.high: GC_unref(a[i])
    
    when isMainModule:
      test()
    

Reply via email to