> Dear Kan, > > On Wed, Jan 3, 2018 at 9:20 PM, Liang, Kan <kan.li...@intel.com> wrote: > > Hi Stephane and Andi, > > > > Could you please review the script? > > > > If it's OK for you, could you please Ack/Review this? > > > > Thanks, > > Kan > > > >> > >> From: Kan Liang <kan.li...@intel.com> > >> > >> There could be different types of memory in the system. E.g normal > >> System Memory, Persistent Memory. To understand how the workload > maps > >> to > >> those memories, it's important to know the I/O statistics of them. > >> Perf can collect physical addresses, but those are raw data. > >> It still needs extra work to resolve the physical addresses. > >> Provide a script to facilitate the physical addresses resolving and > >> I/O statistics. > >> > >> Profile with MEM_INST_RETIRED.ALL_LOADS or > >> MEM_UOPS_RETIRED.ALL_LOADS > >> event if any of them is available. > >> Look up the /proc/iomem and resolve the physical address. > >> Provide memory type summary. > >> > >> Here is an example output. > >> #perf script report mem-phys-addr > >> Event: mem_inst_retired.all_loads:P > >> Memory type count percentage > >> ---------------------------------------- ----------- ----------- > >> System RAM 74 53.2% > >> Persistent Memory 55 39.6% > >> N/A 10 7.2% > >> > >> Signed-off-by: Kan Liang <kan.li...@intel.com> > >> --- > >> > >> Changes since V1: > >> - Do not mix DLA and Load Latency. Do not compare the loads and stores. > >> Only profile the loads. > >> - Use event name to replace the RAW event > >> > >> tools/perf/scripts/python/bin/mem-phys-addr-record | 19 +++++ > >> tools/perf/scripts/python/bin/mem-phys-addr-report | 3 + > >> tools/perf/scripts/python/mem-phys-addr.py | 97 > >> ++++++++++++++++++++++ > >> .../util/scripting-engines/trace-event-python.c | 2 + > >> 4 files changed, 121 insertions(+) > >> create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr- > record > >> create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr- > report > >> create mode 100644 tools/perf/scripts/python/mem-phys-addr.py > >> > >> diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-record > >> b/tools/perf/scripts/python/bin/mem-phys-addr-record > >> new file mode 100644 > >> index 0000000..5a87512 > >> --- /dev/null > >> +++ b/tools/perf/scripts/python/bin/mem-phys-addr-record > >> @@ -0,0 +1,19 @@ > >> +#!/bin/bash > >> + > >> +# > >> +# Profiling physical memory by all retired load instructions/uops event > >> +# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS > >> +# > >> + > >> +load=`perf list | grep mem_inst_retired.all_loads` > >> +if [ -z "$load" ]; then > >> + load=`perf list | grep mem_uops_retired.all_loads` > >> +fi > >> +if [ -z "$load" ]; then > >> + echo "There is no event to count all retired load instructions/uops." > >> + exit 1 > >> +fi > >> + > >> +arg=$(echo $load | tr -d ' ') > >> +arg="$arg:P" > >> +perf record --phys-data -e $arg $@ > >> diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-report > >> b/tools/perf/scripts/python/bin/mem-phys-addr-report > >> new file mode 100644 > >> index 0000000..3f2b847 > >> --- /dev/null > >> +++ b/tools/perf/scripts/python/bin/mem-phys-addr-report > >> @@ -0,0 +1,3 @@ > >> +#!/bin/bash > >> +# description: resolve physical address samples > >> +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys- > addr.py > >> diff --git a/tools/perf/scripts/python/mem-phys-addr.py > >> b/tools/perf/scripts/python/mem-phys-addr.py > >> new file mode 100644 > >> index 0000000..1d1f757 > >> --- /dev/null > >> +++ b/tools/perf/scripts/python/mem-phys-addr.py > >> @@ -0,0 +1,97 @@ > >> +# mem-phys-addr.py: Resolve physical address samples > >> +# Copyright (c) 2017, Intel Corporation. > >> +# > >> +# This program is free software; you can redistribute it and/or modify it > >> +# under the terms and conditions of the GNU General Public License, > >> +# version 2, as published by the Free Software Foundation. > >> +# > >> +# This program is distributed in the hope it will be useful, but WITHOUT > >> +# ANY WARRANTY; without even the implied warranty of > >> MERCHANTABILITY or > >> +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > License > >> for > >> +# more details. > > May be you could consider using an SPDX tag instead of this fine but > long legalese? > See Thomas doc patches [1] for details > > [1] https://lkml.org/lkml/2017/12/28/323 >
Thanks Philippe. I will update the license in V3. > >> + > >> +from __future__ import division > >> +import os > >> +import sys > >> +import struct > >> +import re > >> +import bisect > >> +import collections > >> + > >> +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ > >> + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') > >> + > >> +system_ram = [] > >> +pmem = [] > >> +f = None > >> +load_mem_type_cnt = collections.Counter() > >> +event_name = None > > nit. may be it does not matter too much.... but using globals > throughout as opposed to return values makes the script harder to read > IMHO. > The python script is embedded into C code. It implements the standard interfaces. I think the globals should be a good way to share between functions and among each calls. Otherwise, it has to change the interfaces, which will impact other existing scripts. Thanks, Kan > >> + > >> +def parse_iomem(): > >> + global f > >> + f = open('/proc/iomem', 'r') > >> + for i, j in enumerate(f): > >> + m = re.split('-|:',j,2) > >> + if m[2].strip() == 'System RAM': > >> + system_ram.append(long(m[0], 16)) > >> + system_ram.append(long(m[1], 16)) > >> + if m[2].strip() == 'Persistent Memory': > >> + pmem.append(long(m[0], 16)) > >> + pmem.append(long(m[1], 16)) > >> + > >> +def print_memory_type(): > >> + print "Event: %s" % (event_name) > >> + print "%-40s %10s %10s\n" % ("Memory type", "count", > >> "percentage"), > >> + print "%-40s %10s %10s\n" % > >> ("----------------------------------------", \ > >> + "-----------", "-----------"), > >> + total = sum(load_mem_type_cnt.values()) > >> + for mem_type, count in sorted(load_mem_type_cnt.most_common(), > >> \ > >> + key = lambda(k, v): (v, k), reverse = > >> True): > >> + print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * > >> count / total), > >> + > >> +def trace_begin(): > >> + parse_iomem() > >> + > >> +def trace_end(): > >> + print_memory_type() > >> + f.close() > >> + > >> +def is_system_ram(phys_addr): > >> + #/proc/iomem is sorted > >> + position = bisect.bisect(system_ram, phys_addr) > >> + if position % 2 == 0: > >> + return False > >> + return True > >> + > >> +def is_persistent_mem(phys_addr): > >> + position = bisect.bisect(pmem, phys_addr) > >> + if position % 2 == 0: > >> + return False > >> + return True > >> + > >> +def find_memory_type(phys_addr): > >> + if phys_addr == 0: > >> + return "N/A" > >> + if is_system_ram(phys_addr): > >> + return "System RAM" > >> + > >> + if is_persistent_mem(phys_addr): > >> + return "Persistent Memory" > >> + > >> + #slow path, search all > >> + f.seek(0, 0) > >> + for j in f: > >> + m = re.split('-|:',j,2) > >> + if long(m[0], 16) <= phys_addr <= long(m[1], 16): > >> + return m[2] > >> + return "N/A" > >> + > >> +def process_event(param_dict): > >> + name = param_dict["ev_name"] > >> + sample = param_dict["sample"] > >> + phys_addr = sample["phys_addr"] > >> + > >> + global event_name > >> + if event_name == None: > >> + event_name = name > >> + load_mem_type_cnt[find_memory_type(phys_addr)] += 1 > >> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c > >> b/tools/perf/util/scripting-engines/trace-event-python.c > >> index c7187f0..8cd6317 100644 > >> --- a/tools/perf/util/scripting-engines/trace-event-python.c > >> +++ b/tools/perf/util/scripting-engines/trace-event-python.c > >> @@ -500,6 +500,8 @@ static PyObject *get_perf_sample_dict(struct > >> perf_sample *sample, > >> PyLong_FromUnsignedLongLong(sample->time)); > >> pydict_set_item_string_decref(dict_sample, "period", > >> PyLong_FromUnsignedLongLong(sample->period)); > >> + pydict_set_item_string_decref(dict_sample, "phys_addr", > >> + PyLong_FromUnsignedLongLong(sample- > >> >phys_addr)); > >> set_sample_read_in_dict(dict_sample, sample, evsel); > >> pydict_set_item_string_decref(dict, "sample", dict_sample); > >> > >> -- > >> 2.7.4 > > > > > -- > Cordially > Philippe Ombredanne