#!/usr/bin/python
# -*- coding: latin-1 -*-
#
# Show difference between two Valgrind logs. 
#
# Author: Kimmo Hämäläinen <kimmo.hamalainen@nokia.com>
#
# The first log's blocks are subtracted from the second log's blocks and the
# difference is printed out. Negative number of blocks means blocks that
# are present in the first log but missing from the second log.
#
# NOTE: Valgrind sometimes has backtraces with varying block sizes, such as
# "104 bytes in 3 blocks...". These cause non-integer block sizes and the script
# probably fails to correctly subtract them. Naturally the script can also fail
# if the division produces an integer block size by accident.
#
# This is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1 as published by the Free Software Foundation.

import io, re, sys, string

if len(sys.argv) != 3:
  print 'Usage: %s <log1> <log2>' % sys.argv[0]
  sys.exit(1)

f1 = io.open(sys.argv[1])
f2 = io.open(sys.argv[2])
blocks1 = dict()
blocks2 = dict()
start_re = re.compile('==[0-9]+== ([0-9,]+) bytes in ([0-9,]+) blocks are '
                      '((still reachable)|(possibly lost)|(definitely lost))')
mid_re = re.compile('==[0-9]+==    ((by)|(at)) 0x[0-9A-Z]+: (.+\n)')
end_re = re.compile('==[0-9]+== \n')

def get_blocks(f, blocks):
  # blocks is a dict of dicts with backtrace as the key.
  # The value dict maps block sizes to the number of those blocks.
  global start_re, mid_re, end_re
  backtrace = ''
  blocksz = nblocks = 0
  for l in f.readlines():
    m = start_re.match(l)
    if m:
      bytes = float(string.replace(m.group(1), ',', ''))
      nblocks = int(string.replace(m.group(2), ',', ''))
      blocksz = bytes / nblocks
      backtrace = '%s at\n' % m.group(3)
    elif nblocks > 0 and end_re.match(l):
      if backtrace not in blocks:
        blocks[backtrace] = {blocksz: nblocks}
      elif blocksz in blocks[backtrace]:
        blocks[backtrace][blocksz] += nblocks
      else:
        blocks[backtrace][blocksz] = nblocks
      backtrace = ''
      blocksz = nblocks = 0
    elif nblocks > 0:
      backtrace += mid_re.match(l).group(4)

get_blocks(f1, blocks1)
get_blocks(f2, blocks2)
diff = dict()

# remove blocks of blocks1 from blocks2; negative number for blocks that are
# missing from blocks2
for backtrace in blocks1:
  if backtrace in blocks2:
    for blocksz in blocks1[backtrace]:
      if backtrace not in diff:
        diff[backtrace] = dict()
      if blocksz in blocks2[backtrace]:
        diff[backtrace][blocksz] = blocks2[backtrace][blocksz] \
                                   - blocks1[backtrace][blocksz]
      else:
        diff[backtrace][blocksz] = -blocks1[backtrace][blocksz]
    # block sizes that are in blocks2[backtrace] but not in blocks1[backtrace]
    for blocksz in blocks2[backtrace]:
      if blocksz not in blocks1[backtrace]:
        diff[backtrace][blocksz] = blocks2[backtrace][blocksz]
  else:
    diff[backtrace] = dict()
    for blocksz in blocks1[backtrace]:
      diff[backtrace][blocksz] = -blocks1[backtrace][blocksz]

# add backtraces that are in blocks2 but not in blocks1
for backtrace in blocks2:
  if backtrace not in blocks1:
    diff[backtrace] = blocks2[backtrace] 

copy = dict()
# remove block sizes that have zero blocks and blockless backtraces
for backtrace in diff:
  for blocksz in diff[backtrace]:
    if diff[backtrace][blocksz] != 0:
      if backtrace not in copy:
        copy[backtrace] = dict()
      copy[backtrace][blocksz] = diff[backtrace][blocksz]
diff = copy

def key_func(backtrace):
  global diff
  total_bytes = 0
  for blocksz in diff[backtrace]:
    total_bytes += blocksz * abs(diff[backtrace][blocksz])
  return total_bytes

# sort the backtraces and print them
l = diff.keys()
l.sort(key = key_func)
for backtrace in l:
  for blocksz in diff[backtrace]:
    print diff[backtrace][blocksz], 'blocks of', blocksz, 'bytes (%s bytes)' % \
          abs(diff[backtrace][blocksz] * blocksz)
  print backtrace

def count_bytes(d):
  total_bytes = 0
  for backtrace in d:
    for blocksz in d[backtrace]:
      total_bytes += blocksz * d[backtrace][blocksz]
  return total_bytes

print '%d backtraces (%d bytes in total) in %s' % (len(blocks1),
                                                   count_bytes(blocks1),
                                                   sys.argv[1])
print '%d backtraces (%d bytes in total) in %s' % (len(blocks2),
                                                   count_bytes(blocks2),
                                                   sys.argv[2])
print '%d backtraces (%d bytes in total) in %s - %s' % (len(diff),
                                                        count_bytes(diff),
                                                        sys.argv[2],
                                                        sys.argv[1])

