On Thu, Jun 25, 2020 at 11:45 AM Aleksandar Markovic <aleksandar.qemu.de...@gmail.com> wrote: > > сре, 24. јун 2020. у 17:32 Ahmed Karaman > <ahmedkhaledkara...@gmail.com> је написао/ла: > > > > Syntax: > > topN_perf.py [-h] [-n] <number of displayed top functions> -- \ > > <qemu executable> [<qemu executable options>] \ > > <target executable> [<target execurable options>] > > > > [-h] - Print the script arguments help message. > > [-n] - Specify the number of top functions to print. > > - If this flag is not specified, the tool defaults to 25. > > > > Example of usage: > > topN_perf.py -n 20 -- qemu-arm coulomb_double-arm > > > > Example Output: > > No. Percentage Name Caller > > ---- ---------- ------------------------- ------------------------- > > 1 16.25% float64_mul qemu-x86_64 > > 2 12.01% float64_sub qemu-x86_64 > > 3 11.99% float64_add qemu-x86_64 > > 4 5.69% helper_mulsd qemu-x86_64 > > 5 4.68% helper_addsd qemu-x86_64 > > 6 4.43% helper_lookup_tb_ptr qemu-x86_64 > > 7 4.28% helper_subsd qemu-x86_64 > > 8 2.71% f64_compare qemu-x86_64 > > 9 2.71% helper_ucomisd qemu-x86_64 > > 10 1.04% helper_pand_xmm qemu-x86_64 > > 11 0.71% float64_div qemu-x86_64 > > 12 0.63% helper_pxor_xmm qemu-x86_64 > > 13 0.50% 0x00007f7b7004ef95 [JIT] tid 491 > > 14 0.50% 0x00007f7b70044e83 [JIT] tid 491 > > 15 0.36% helper_por_xmm qemu-x86_64 > > 16 0.32% helper_cc_compute_all qemu-x86_64 > > 17 0.30% 0x00007f7b700433f0 [JIT] tid 491 > > 18 0.30% float64_compare_quiet qemu-x86_64 > > 19 0.27% soft_f64_addsub qemu-x86_64 > > 20 0.26% round_to_int qemu-x86_64 > > > > Signed-off-by: Ahmed Karaman <ahmedkhaledkara...@gmail.com> > > --- > > scripts/performance/topN_perf.py | 142 +++++++++++++++++++++++++++++++ > > 1 file changed, 142 insertions(+) > > create mode 100755 scripts/performance/topN_perf.py > > > > diff --git a/scripts/performance/topN_perf.py > > b/scripts/performance/topN_perf.py > > new file mode 100755 > > index 0000000000..d2b939c375 > > --- /dev/null > > +++ b/scripts/performance/topN_perf.py > > @@ -0,0 +1,142 @@ > > +#!/usr/bin/env python3 > > + > > +# Print the top N most executed functions in QEMU using perf. > > +# Syntax: > > +# topN_perf.py [-h] [-n] <number of displayed top functions> -- \ > > +# <qemu executable> [<qemu executable options>] \ > > +# <target executable> [<target execurable options>] > > +# > > +# [-h] - Print the script arguments help message. > > +# [-n] - Specify the number of top functions to print. > > +# - If this flag is not specified, the tool defaults to 25. > > +# > > +# Example of usage: > > +# topN_perf.py -n 20 -- qemu-arm coulomb_double-arm > > +# > > +# This file is a part of the project "TCG Continuous Benchmarking". > > +# > > +# Copyright (C) 2020 Ahmed Karaman <ahmedkhaledkara...@gmail.com> > > +# Copyright (C) 2020 Aleksandar Markovic > > <aleksandar.qemu.de...@gmail.com> > > +# > > +# This program is free software: you can redistribute it and/or modify > > +# it under the terms of the GNU General Public License as published by > > +# the Free Software Foundation, either version 2 of the License, or > > +# (at your option) any later version. > > +# > > +# This program is distributed in the hope that it will be useful, > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > +# GNU General Public License for more details. > > +# > > +# You should have received a copy of the GNU General Public License > > +# along with this program. If not, see <https://www.gnu.org/licenses/>. > > + > > +import argparse > > +import os > > +import subprocess > > +import sys > > + > > + > > +# Parse the command line arguments > > +parser = argparse.ArgumentParser( > > + usage='topN_perf.py [-h] [-n] <number of displayed top functions > -- > > ' > > + '<qemu executable> [<qemu executable options>] ' > > + '<target executable> [<target executable options>]') > > + > > +parser.add_argument('-n', dest='top', type=int, default=25, > > + help='Specify the number of top functions to print.') > > + > > +parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS) > > + > > +args = parser.parse_args() > > + > > +# Extract the needed variables from the args > > +command = args.command > > +top = args.top > > + > > +# Insure that perf is installed > > +check_perf = subprocess.run(["which", "perf"], stdout=subprocess.DEVNULL) > > +if check_perf.returncode: > > + sys.exit("Please install perf before running the script!") > > I would rename "chech_perf" to "check_perf_presence". It is more > specific, clearer. > > > + > > +# Insure user has previllage to run perf > > +check_perf_executability = subprocess.run(["perf", "stat", "ls", "/"], > > + stdout=subprocess.DEVNULL, > > stderr=subprocess.DEVNULL) > > +if check_perf_executability.returncode: > > + sys.exit( > > +""" > > +Error: > > +You may not have permission to collect stats. > > + > > +Consider tweaking /proc/sys/kernel/perf_event_paranoid, > > +which controls use of the performance events system by > > +unprivileged users (without CAP_SYS_ADMIN). > > + > > + -1: Allow use of (almost) all events by all users > > + Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK > > + 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN > > + Disallow raw tracepoint access by users without CAP_SYS_ADMIN > > + 1: Disallow CPU event access by users without CAP_SYS_ADMIN > > + 2: Disallow kernel profiling by users without CAP_SYS_ADMIN > > + > > +To make this setting permanent, edit /etc/sysctl.conf too, e.g.: > > + kernel.perf_event_paranoid = -1 > > +""" > > +) > > Very good. > > > + > > +# Run perf record > > +perf_record = subprocess.run((["perf", "record"] + command), > > + stdout=subprocess.DEVNULL, > > stderr=subprocess.PIPE) > > +if perf_record.returncode: > > + os.unlink('perf.data') > > + sys.exit(perf_record.stderr.decode("utf-8")) > > Here, the file "perf.data" will be created in the current working > directory. If one existed prior to script execution, it will be > overwritten. > > I think such "corruption" of current working directory is not optimal. > It would be better that the script doesn't touch current working > directory at all (perhaps user wants to keep perf.data he obtained > from some experiment in the past. > > Therefore, I think it would be better if you specify output of "perf > report" to be "/tmp/perf.data", not "perf.data", which is the default. > There is an option of "perf record" to specify the output file: > > -o, --output= > Output file name. > > > + > > +# Save perf report output to perf_report.out > > +with open("perf_report.out", "w") as output: > > + perf_report = subprocess.run( > > + ["perf", "report", "--stdio"], stdout=output, > > stderr=subprocess.PIPE) > > + if perf_report.returncode: > > + os.unlink('perf.data') > > + output.close() > > + os.unlink('perf_report.out') > > + sys.exit(perf_report.stderr.decode("utf-8")) > > For similar reasons described above, input file should be > "/tmp/perf.data". Option of "perf report" for input file: > > -i, --input= > Input file name. > > Output file should be "/tmp/perf_report.out", not "perf_report.out". > > > + > > +# Read the reported data to functions[] > > +functions = [] > > +with open("perf_report.out", "r") as data: > > "/tmp/perf_report.out" > > > + # Only read lines that are not comments (comments start with #) > > + # Only read lines that are not empty > > + functions = [line for line in data.readlines() if line and line[0] > > + != '#' and line[0] != "\n"] > > + > > +# Limit the number of top functions to "top" > > +number_of_top_functions = top if len(functions) > top else len(functions) > > + > > +# Store the data of the top functions in top_functions[] > > +top_functions = functions[:number_of_top_functions] > > + > > +# Print table header > > +print('{:>4} {:>10} {:<30} {}\n{} {} {} {}'.format('No.', > > + 'Percentage', > > + 'Name', > > 'Function Name' would be more ergonomic here. > > > + 'Caller', > > Please replace 'Caller' with 'Invoked by'. 'Caller' implies a function > that directly calls the function in question. 'Invoked by' avoids such > confusion, and it just feels more appropriate here. > > > + '-' * 4, > > + '-' * 10, > > + '-' * 30, > > + '-' * 25)) > > + > > + > > +# Print top N functions > > +for (index, function) in enumerate(top_functions, start=1): > > + function_data = function.split() > > + function_percentage = function_data[0] > > + function_name = function_data[-1] > > + function_caller = ' '.join(function_data[2:-2]) > > function_invoker > > > + print('{:>4} {:>10} {:<30} {}'.format(index, > > + function_percentage, > > + function_name, > > + function_caller)) > > function_invoker > > > + > > +# Remove intermediate files > > +os.unlink('perf.data') > > +os.unlink('perf_report.out') > > os.unlink('/tmp/perf.data') > os.unlink('/tmp/perf_report.out') > > > > -- > > 2.17.1 > >
Thanks Mr. Aleksandar. These are really valid points. I'll add these updates in v4 of this series. Best regards, Ahmed Karaman