Hi,

a program I'm working on uses hashes of vectors to organize data. The
vectors are the same length and need to be summed over the hash keys.
The obvious solution uses a double for loop: one for the keys of the
hash and one over the vector indices. Thinking about it I decided that
the most efficient ordering of those loops would be to do the loop over
vector indices inside of the key loop. That way there would be fewer
calls to hash-ref than the other way round(*). Since everybody is fond
of saying that intuition about performance is often wrong in a
surprising way I decided to get sure and do some measurements for fun
and glory.
My initial intuition turned out to be correct, but there was another
surprise. Writing (as an afterthought) the code in a more high-level way
boosted performance by a factor of almost 2! That is:

(define (sum-hash-of-vectors3 table keys size)
  (apply vector-map +
         (for/list ((k keys))
                   (hash-ref table k))))

is more performant than

(define (sum-hash-of-vectors1 table keys size)
  (define v (make-vector size 0))
  (for ((k keys))
       (let ((v_k (hash-ref table k)))
         (for ((i size))
              (vector-set!
               v i
               (+ (vector-ref v i)
                  (vector-ref v_k i))))))
  v)

This was quite a surprise and is due to (my guess) the quality of the
compiler?

Marijn


(*) I believe this is also the reason I chose to lay the data out this
way and not as a vector of hashes, so for completeness I include that
option as well.

#lang racket

(define-syntax-rule (for-keys/hash (k keys) expr)
  (for/hash ((k keys)) (values k expr)) )

(define (sum-hash-of-vectors1 table keys size)
  (define v (make-vector size 0))
  (for ((k keys))
       (let ((v_k (hash-ref table k)))
         (for ((i size))
              (vector-set!
               v i
               (+ (vector-ref v i)
                  (vector-ref v_k i))))))
  v)

(define (sum-hash-of-vectors2 table keys size)
  (define v (make-vector size 0))
  (for ((i size))
       (vector-set!
        v i
        (for/sum ((k keys))
                 (vector-ref (hash-ref table k) i))))
  v)

(define (sum-hash-of-vectors2.1 table keys size)
  (for/vector
   #:length size ((i size))
   (for/sum ((k keys))
            (vector-ref (hash-ref table k) i))))

(define (sum-hash-of-vectors3 table keys size)
  (apply vector-map +
         (for/list ((k keys))
                   (hash-ref table k))))

(define (sum-vector-of-tables1 vector size keys)
  (define v (make-vector size 0))
  (for* ((k keys) (i size))
        (vector-set!
         v i
         (+ (vector-ref v i)
            (hash-ref (vector-ref vector i) k))))
  v)

(define (sum-vector-of-tables2.1 vector size keys)
  (for/vector
   #:length size ((i size))
   (define table (vector-ref vector i))
   (for/sum ((k keys))
            (hash-ref table k))))

(define keys
  '(a b c d e f g h i j k l m n o p))

(define size 113)

(define table-of-vectors
  (for-keys/hash
   (k keys)
   (build-vector size (lambda (i) (random 1000)))))

(define vector-of-tables
  (for/vector
   #:length size ((s size))
   (for-keys/hash
    (k keys)
    (vector-ref (hash-ref table-of-vectors k) s))))

(displayln table-of-vectors)
(displayln vector-of-tables)

(define iterations# 10000)

(let ((table table-of-vectors))
(time
 (for ((i iterations#))
      (let ((sum (sum-hash-of-vectors1 table keys size)))
        (when (zero? i) (displayln sum)))))

(time
 (for ((i iterations#))
      (let ((sum (sum-hash-of-vectors2 table keys size)))
        (when (zero? i) (displayln sum)))))

(time
 (for ((i iterations#))
      (let ((sum (sum-hash-of-vectors2.1 table keys size)))
        (when (zero? i) (displayln sum)))))

(time
 (for ((i iterations#))
      (let ((sum (sum-hash-of-vectors3 table keys size)))
        (when (zero? i) (displayln sum)))))
)
          
(let ((vector vector-of-tables))
(time
 (for ((i iterations#))
      (let ((sum (sum-vector-of-tables1 vector size keys)))
        (when (zero? i) (displayln sum)))))

(time
 (for ((i iterations#))
      (let ((sum (sum-vector-of-tables2.1 vector size keys)))
        (when (zero? i) (displayln sum)))))
)

Attachment: signature.asc
Description: OpenPGP digital signature

____________________
  Racket Users list:
  http://lists.racket-lang.org/users

Reply via email to