Author: Richard Plangger <planri...@gmail.com>
Branch: new-jit-log
Changeset: r83846:c55eaccdb3a6
Date: 2016-04-21 13:56 +0200
http://bitbucket.org/pypy/pypy/changeset/c55eaccdb3a6/

Log:    moved .../metainterp/jitlog.py to rlib (since the vm implementer
        uses the jitlog directly) implemented compression (common prefix)
        for strings that are encoded in the debug merge point

diff --git a/rpython/jit/metainterp/warmstate.py 
b/rpython/jit/metainterp/warmstate.py
--- a/rpython/jit/metainterp/warmstate.py
+++ b/rpython/jit/metainterp/warmstate.py
@@ -6,7 +6,7 @@
 from rpython.rlib.debug import debug_start, debug_stop, debug_print
 from rpython.rlib.debug import have_debug_prints_for
 from rpython.rlib.jit import PARAMETERS
-from rpython.rlib import jitlog
+from rpython.rlib import jitlog as jl
 from rpython.rlib.nonconst import NonConstant
 from rpython.rlib.objectmodel import specialize, we_are_translated, r_dict
 from rpython.rlib.rarithmetic import intmask, r_uint
@@ -684,24 +684,25 @@
         if get_location_ptr is not None:
             types = self.jitdriver_sd._get_loc_types
             unwrap_greenkey = self.make_unwrap_greenkey()
-            unrolled_types = unrolling_iterable(enumerate(types))
+            unrolled_types = unrolling_iterable(types)
             def get_location(greenkey):
                 greenargs = unwrap_greenkey(greenkey)
                 fn = support.maybe_on_top_of_llinterp(rtyper, get_location_ptr)
-                tuple_ptr = fn(*greenargs)
-                #
-                flag = intmask(tuple_ptr.item0)
-                value_tuple = tuple_ptr.item1
-                ntuple = ()
-                for i,(_,t) in unrolled_types:
-                    if t == "s":
-                        ntuple += (hlstr(getattr(value_tuple, 'item' + 
str(i))),)
-                    elif t == "i":
-                        ntuple += (intmask(getattr(value_tuple, 'item' + 
str(i))),)
+                value_tuple = fn(*greenargs)
+                values = []
+                i = 0
+                for sem_type,gen_type in unrolled_types:
+                    if gen_type == "s":
+                        value = getattr(value_tuple, 'item' + str(i))
+                        values.append(jl.wrap(sem_type,gen_type,hlstr(value)))
+                    elif gen_type == "i":
+                        value = getattr(value_tuple, 'item' + str(i))
+                        
values.append(jl.wrap(sem_type,gen_type,intmask(value)))
                     else:
                         raise NotImplementedError
-                return flag, ntuple
-            self.get_location_types = types
+                    i += 1
+                return values
+            self.get_location_types = list(types)
             self.get_location = get_location
         else:
             self.get_location_types = None
diff --git a/rpython/rlib/jitlog.py b/rpython/rlib/jitlog.py
--- a/rpython/rlib/jitlog.py
+++ b/rpython/rlib/jitlog.py
@@ -1,5 +1,75 @@
+import sys
+import weakref
+import struct
+import os
 
-# generic parameters
+from rpython.rlib.rvmprof import cintf
+from rpython.jit.metainterp import resoperation as resoperations
+from rpython.jit.metainterp.resoperation import rop
+from rpython.jit.metainterp.history import ConstInt, ConstFloat
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rlib.objectmodel import compute_unique_id, always_inline
+from rpython.rlib.objectmodel import we_are_translated, specialize
+from rpython.rlib.unroll import unrolling_iterable
+
+def commonprefix(m):
+    "Given a list of pathnames, returns the longest common leading component"
+    if not m: return ''
+    s1 = min(m)
+    s2 = max(m)
+    for i, c in enumerate(s1):
+        if c != s2[i]:
+            return s1[:i]
+    return s1
+
+@always_inline
+def encode_str(string):
+    return encode_le_32bit(len(string)) + string
+
+@always_inline
+def encode_le_16bit(val):
+    return chr((val >> 0) & 0xff) + chr((val >> 8) & 0xff)
+
+@always_inline
+def encode_le_32bit(val):
+    return ''.join([chr((val >> 0) & 0xff),
+                    chr((val >> 8) & 0xff),
+                    chr((val >> 16) & 0xff),
+                    chr((val >> 24) & 0xff)])
+
+@always_inline
+def encode_le_64bit(val):
+    return ''.join([chr((val >> 0) & 0xff),
+                    chr((val >> 8) & 0xff),
+                    chr((val >> 16) & 0xff),
+                    chr((val >> 24) & 0xff),
+                    chr((val >> 32) & 0xff),
+                    chr((val >> 40) & 0xff),
+                    chr((val >> 48) & 0xff),
+                    chr((val >> 56)& 0xff)])
+
+@always_inline
+def encode_le_addr(val):
+    if IS_32_BIT:
+        return encode_le_32bit(val)
+    else:
+        return encode_le_64bit(val)
+
+def encode_type(type, value):
+    if type == "s":
+        return encode_str(value)
+    elif type == "q":
+        return encode_le_64bit(value)
+    elif type == "i":
+        return encode_le_32bit(value)
+    elif type == "h":
+        return encode_le_32bit(value)
+    else:
+        raise NotImplementedError
+
+
+# more variable parameters
 MP_STR = (0x0, "s")
 MP_INT = (0x0, "i")
 
@@ -7,11 +77,62 @@
 MP_FILENAME = (0x1, "s")
 MP_LINENO = (0x2, "i")
 MP_INDEX = (0x4, "i")
-MP_ENCLOSING = (0x8, "s")
+MP_SCOPE = (0x8, "s")
 MP_OPCODE = (0x10, "s")
 
-def get_type(flag):
-    pass
+class WrappedValue(object):
+    def encode(self, i, prefixes):
+        raise NotImplementedError
+
+class StringValue(WrappedValue):
+    def __init__(self, sem_type, gen_type, value):
+        self.value = value
+
+    def encode(self, log, i, prefixes):
+        str_value = self.value
+        if len(str_value) < 5:
+            enc_value = encode_str(chr(0xff) + str_value)
+        else:
+            cp = commonprefix([prefixes[i], str_value])
+            if cp != prefixes[i]:
+                if len(cp) == 0:
+                    # they are fully different!
+                    prefixes[i] = str_value
+                    enc_value = encode_str(chr(0xff) + str_value)
+                else:
+                    # the prefix changed
+                    prefixes[i] = cp
+                    # common prefix of field i
+                    assert i != 0xff
+                    log._write_marked(MARK_COMMON_PREFIX, chr(i) \
+                                                      + encode_str(cp))
+                    enc_value = encode_str(chr(i) + str_value)
+            else:
+                enc_value = encode_str(chr(i) + str_value)
+        #
+        if prefixes[i] is None:
+            prefixes[i] = str_value
+        return enc_value
+
+class IntValue(WrappedValue):
+    def __init__(self, sem_type, gen_type, value):
+        self.value = value
+
+    def encode(self, log, i, prefixes):
+        return encode_le_64bit(self.value)
+
+# note that a ...
+# "semantic_type" is an integer denoting which meaning does a type at a merge 
point have
+#                 there are very common ones that are predefined. E.g. 
MP_FILENAME
+# "generic_type" is one of the primitive types supported (string,int)
+
+@specialize.argtype(2)
+def wrap(sem_type, gen_type, value):
+    if isinstance(value, int):
+        return IntValue(sem_type, gen_type, value)
+    elif isinstance(value, str):
+        return StringValue(sem_type, gen_type, value)
+    raise NotImplementedError
 
 def returns(*args):
     """ Decorate your get_location function to specify the types.
@@ -26,3 +147,305 @@
         method._loc_types = args
         return method
     return decor
+
+JITLOG_VERSION = 1
+JITLOG_VERSION_16BIT_LE = struct.pack("<H", JITLOG_VERSION)
+
+MARK_INPUT_ARGS = 0x10
+MARK_RESOP_META = 0x11
+MARK_RESOP = 0x12
+MARK_RESOP_DESCR = 0x13
+MARK_ASM_ADDR = 0x14
+MARK_ASM = 0x15
+
+# which type of trace is logged after this
+# the trace as it is recorded by the tracer
+MARK_TRACE = 0x16
+# the trace that has passed the optimizer
+MARK_TRACE_OPT = 0x17
+# the trace assembled to machine code (after rewritten)
+MARK_TRACE_ASM = 0x18
+
+# the machine code was patched (e.g. guard)
+MARK_STITCH_BRIDGE = 0x19
+
+MARK_JITLOG_COUNTER = 0x20
+MARK_START_TRACE = 0x21
+MARK_INIT_MERGE_POINT = 0x22
+
+MARK_JITLOG_HEADER = 0x23
+MARK_JITLOG_DEBUG_MERGE_POINT = 0x24
+MARK_COMMON_PREFIX = 0x25
+
+IS_32_BIT = sys.maxint == 2**31-1
+
+def assemble_header():
+    version = JITLOG_VERSION_16BIT_LE
+    count = len(resoperations.opname)
+    content = [version, chr(MARK_RESOP_META),
+               encode_le_16bit(count)]
+    for opnum, opname in resoperations.opname.items():
+        content.append(encode_le_16bit(opnum))
+        content.append(encode_str(opname.lower()))
+    return ''.join(content)
+
+class VMProfJitLogger(object):
+    def __init__(self):
+        self.cintf = cintf.setup()
+        self.memo = {}
+        self.trace_id = 0
+
+    def setup_once(self):
+        if self.cintf.jitlog_enabled():
+            return
+        self.cintf.jitlog_try_init_using_env()
+        if not self.cintf.jitlog_enabled():
+            return
+        blob = assemble_header()
+        self.cintf.jitlog_write_marked(MARK_JITLOG_HEADER, blob, len(blob))
+
+    def finish(self):
+        self.cintf.jitlog_teardown()
+
+    def start_new_trace(self, faildescr=None, entry_bridge=False):
+        if not self.cintf.jitlog_enabled():
+            return
+        content = [encode_le_addr(self.trace_id)]
+        if faildescr:
+            content.append(encode_str('bridge'))
+            descrnmr = compute_unique_id(faildescr)
+            content.append(encode_le_addr(descrnmr))
+        else:
+            content.append(encode_str('loop'))
+            content.append(encode_le_addr(int(entry_bridge)))
+        self._write_marked(MARK_START_TRACE, ''.join(content))
+        self.trace_id += 1
+
+    def _write_marked(self, mark, line):
+        if not we_are_translated():
+            assert self.cintf.jitlog_enabled()
+        self.cintf.jitlog_write_marked(mark, line, len(line))
+
+    def log_jit_counter(self, struct):
+        if not self.cintf.jitlog_enabled():
+            return
+        le_addr = encode_le_addr(struct.number)
+        # not an address (but a number) but it is a machine word
+        le_count = encode_le_addr(struct.i)
+        self._write_marked(MARK_JITLOG_COUNTER, le_addr + le_count)
+
+    def log_trace(self, tag, metainterp_sd, mc, memo=None):
+        if not self.cintf.jitlog_enabled():
+            return EMPTY_TRACE_LOG
+        assert isinstance(tag, int)
+        if memo is None:
+            memo = {}
+        return LogTrace(tag, memo, metainterp_sd, mc, self)
+
+    def log_patch_guard(self, descr_number, addr):
+        if not self.cintf.jitlog_enabled():
+            return
+        le_descr_number = encode_le_addr(descr_number)
+        le_addr = encode_le_addr(addr)
+        lst = [le_descr_number, le_addr]
+        self._write_marked(MARK_STITCH_BRIDGE, ''.join(lst))
+
+class BaseLogTrace(object):
+    def write_trace(self, trace):
+        return None
+
+    def write(self, args, ops, ops_offset={}):
+        return None
+
+EMPTY_TRACE_LOG = BaseLogTrace()
+
+def encode_merge_point(log, prefixes, values):
+    line = []
+    unrolled = unrolling_iterable(values)
+    i = 0
+    for value in unrolled:
+        cp = value.encode_common_prefix(i, prefixes)
+        if cp is not None:
+            log._write_marked(MARK_COMMON_PREFIX, cp)
+        line.append(value.encode(log,i,prefixes))
+        i += 1
+    return ''.join(line)
+
+
+class LogTrace(BaseLogTrace):
+    def __init__(self, tag, memo, metainterp_sd, mc, logger):
+        self.memo = memo
+        self.metainterp_sd = metainterp_sd
+        self.ts = None
+        if self.metainterp_sd is not None:
+            self.ts = metainterp_sd.cpu.ts
+        self.tag = tag
+        self.mc = mc
+        self.logger = logger
+        self.common_prefix = None
+
+    def write_trace(self, trace):
+        ops = []
+        i = trace.get_iter()
+        while not i.done():
+            ops.append(i.next())
+        self.write(i.inputargs, ops)
+
+    def write(self, args, ops, ops_offset={}):
+        log = self.logger
+        log._write_marked(self.tag, encode_le_addr(self.logger.trace_id))
+
+        # input args
+        str_args = [self.var_to_str(arg) for arg in args]
+        string = encode_str(','.join(str_args))
+        log._write_marked(MARK_INPUT_ARGS, string)
+
+        # assembler address (to not duplicate it in write_code_dump)
+        if self.mc is not None:
+            absaddr = self.mc.absolute_addr()
+            rel = self.mc.get_relative_pos()
+            # packs <start addr> <end addr> as two unsigend longs
+            le_addr1 = encode_le_addr(absaddr)
+            le_addr2 = encode_le_addr(absaddr + rel)
+            log._write_marked(MARK_ASM_ADDR, le_addr1 + le_addr2)
+        for i,op in enumerate(ops):
+            if rop.DEBUG_MERGE_POINT == op.getopnum():
+                self.encode_debug_info(op)
+                continue
+            mark, line = self.encode_op(op)
+            log._write_marked(mark, line)
+            self.write_core_dump(ops, i, op, ops_offset)
+
+        self.memo = {}
+
+    def encode_once(self):
+        pass
+
+    def encode_debug_info(self, op):
+        # the idea is to write the debug merge point as it's own well known
+        # tag. Compression for common prefixes is implemented:
+
+        log = self.logger
+        jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+        if not jd_sd.warmstate.get_location:
+            return
+        values = jd_sd.warmstate.get_location(op.getarglist()[3:])
+        if values is None:
+            # indicates that this function is not provided to the jit driver
+            return
+        types = jd_sd.warmstate.get_location_types
+
+        if self.common_prefix is None:
+            # first time visiting a merge point
+            # setup the common prefix
+            self.common_prefix = [""] * len(types)
+            encoded_types = []
+            for i, (semantic_type, _) in enumerate(types):
+                encoded_types.append(chr(semantic_type))
+            log._write_marked(MARK_INIT_MERGE_POINT, ''.join(encoded_types))
+
+        # the types have already been written
+        encoded = encode_merge_point(log, self.common_prefix, values)
+        log._write_marked(MARK_JITLOG_DEBUG_MERGE_POINT, encoded)
+
+    def encode_op(self, op):
+        """ an operation is written as follows:
+            <marker> <opid (16 bit)> \
+                     <len (32 bit)> \
+                     <res_val>,<arg_0>,...,<arg_n>,<descr>
+            The marker indicates if the last argument is
+            a descr or a normal argument.
+        """
+        str_args = [self.var_to_str(arg) for arg in op.getarglist()]
+        descr = op.getdescr()
+        le_opnum = encode_le_16bit(op.getopnum())
+        str_res = self.var_to_str(op)
+        line = ','.join([str_res] + str_args)
+        if descr:
+            descr_str = descr.repr_of_descr()
+            line = line + ',' + descr_str
+            string = encode_str(line)
+            descr_number = compute_unique_id(descr)
+            le_descr_number = encode_le_addr(descr_number)
+            return MARK_RESOP_DESCR, le_opnum + string + le_descr_number
+        else:
+            string = encode_str(line)
+            return MARK_RESOP, le_opnum + string
+
+
+    def write_core_dump(self, operations, i, op, ops_offset):
+        if self.mc is None:
+            return
+
+        op2 = None
+        j = i+1
+        # find the next op that is in the offset hash
+        while j < len(operations):
+            op2 = operations[j]
+            if op in ops_offset:
+                break
+            j += 1
+
+        # this op has no known offset in the machine code (it might be
+        # a debug operation)
+        if op not in ops_offset:
+            return
+        # there is no well defined boundary for the end of the
+        # next op in the assembler
+        if op2 is not None and op2 not in ops_offset:
+            return
+        dump = []
+
+        start_offset = ops_offset[op]
+        assert start_offset >= 0
+        # end offset is either the last pos in the assembler
+        # or the offset of op2
+        if op2 is None:
+            end_offset = self.mc.get_relative_pos()
+        else:
+            end_offset = ops_offset[op2]
+
+        count = end_offset - start_offset
+        dump = self.mc.copy_core_dump(self.mc.absolute_addr(), start_offset, 
count)
+        offset = encode_le_16bit(start_offset)
+        edump = encode_str(dump)
+        self.logger._write_marked(MARK_ASM, offset + edump)
+
+    def var_to_str(self, arg):
+        try:
+            mv = self.memo[arg]
+        except KeyError:
+            mv = len(self.memo)
+            self.memo[arg] = mv
+        if isinstance(arg, ConstInt):
+            if self.metainterp_sd and int_could_be_an_address(arg.value):
+                addr = arg.getaddr()
+                name = self.metainterp_sd.get_name_from_address(addr)
+                if name:
+                    return 'ConstClass(' + name + ')'
+            return str(arg.value)
+        elif self.ts is not None and isinstance(arg, self.ts.ConstRef):
+            if arg.value:
+                return 'ConstPtr(ptr' + str(mv) + ')'
+            return 'ConstPtr(null)'
+        if isinstance(arg, ConstFloat):
+            return str(arg.getfloat())
+        elif arg is None:
+            return 'None'
+        elif arg.is_vector():
+            return 'v' + str(mv)
+        elif arg.type == 'i':
+            return 'i' + str(mv)
+        elif arg.type == 'r':
+            return 'p' + str(mv)
+        elif arg.type == 'f':
+            return 'f' + str(mv)
+        else:
+            return '?'
+
+def int_could_be_an_address(x):
+    if we_are_translated():
+        x = rffi.cast(lltype.Signed, x)       # force it
+        return not (-32768 <= x <= 32767)
+    else:
+        return isinstance(x, llmemory.AddressAsInt)
diff --git a/rpython/rlib/test/test_jitlog.py b/rpython/rlib/test/test_jitlog.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_jitlog.py
@@ -0,0 +1,56 @@
+from rpython.jit.tool.oparser import pure_parse
+from rpython.jit.metainterp.optimizeopt.util import equaloplists
+from rpython.jit.metainterp.resoperation import ResOperation, rop
+from rpython.jit.backend.model import AbstractCPU
+from rpython.jit.metainterp.history import ConstInt, ConstPtr
+from rpython.rlib.jitlog import (encode_str, encode_le_16bit, encode_le_64bit)
+from rpython.rlib import jitlog as jl
+import tempfile
+
+class TestLogger(object):
+
+    def make_metainterp_sd(self):
+        class FakeJitDriver(object):
+            class warmstate(object):
+                @staticmethod
+                def get_location(greenkey_list):
+                    assert len(greenkey_list) == 0
+                    return '/home/pypy/jit.py', 0, 'enclosed', 99, 'DEL'
+                get_location_types = 
[(jl.MP_FILENAME,'s'),(0x0,'i'),(jl.MP_SCOPE,'s'), (0x0,'i'), (jl.MP_OPCODE, 
's')]
+
+        class FakeMetaInterpSd:
+            cpu = AbstractCPU()
+            cpu.ts = None
+            jitdrivers_sd = [FakeJitDriver()]
+            def get_name_from_address(self, addr):
+                return 'Name'
+        return FakeMetaInterpSd()
+
+    def test_debug_merge_point(self, tmpdir):
+        logger = jitlog.VMProfJitLogger()
+        file = tmpdir.join('binary_file')
+        file.ensure()
+        fd = file.open('wb')
+        logger.cintf.jitlog_init(fd.fileno())
+        log_trace = logger.log_trace(0, self.make_metainterp_sd(), None)
+        op = ResOperation(rop.DEBUG_MERGE_POINT, [ConstInt(0), ConstInt(0), 
ConstInt(0)])
+        log_trace.write([], [op])
+        #the next line will close 'fd'
+        fd.close()
+        logger.finish()
+        binary = file.read()
+        assert binary.startswith(b'\x00\x04\x00\x00\x00loop')
+        assert binary.endswith(b'\x24' + \
+                               encode_str('/home/pypy/jit.py') + \
+                               encode_le_16bit(0) + \
+                               encode_str('enclosed') + \
+                               encode_le_64bit(99) + \
+                               encode_str('DEL'))
+
+    class FakeLog(object):
+        def _write_marked(self, id, text):
+            pass
+
+    def test_common_prefix(self):
+        fakelog = FakeLog()
+        logger = jitlog.LogTrace(0x0, {}, None, None, fakelog)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to