Hello,
This patch enables interception of compiler calls without replacement of
environment variables. It uses strace (v.4.8) to compute root directory
of processes (in case of chroot), working directory, process name and
its environment variables and tries to guess some header directories.
This approach works well with Android and some build systems like OBS
(local build with osc client) and GBS (checked on Tizen). This behaviour
can be enabled with --use-interceptor option. If this option is not
specified, old behaviour is used (this patch was made to not break it).
Interceptor uses process pool to analyze multiple files at a time. Pool
size can be specified with NUM_PROCESSES environment variable. Default
pool size is a number of processors.
If we have a trace of build (that may be created with `strace -f -v -s
1000000 -o $trace_file_name -e
trace=vfork,fork,clone,execve,chdir,chroot -e signal= $build_cmd`), it
can be passed via TRACE_FILE environment variable. This will not launch
parallel build (analysis only). If option is not specified, temporary
pipe is used, analyzer and compiler are executed in parallel.
-additional-arg option was also added to pass additional parameters to
analyzer. Sometimes scan-build cannot determine all required parameters
if some custom toolchain is used.
Interceptor was tested on Ubuntu 12.04 and 14.04 with strace 4.8. Strace
v.4.6 is known to have some issues with interception of gcc.
--
Best regards,
Aleksei Sidorin
Software Engineer,
IMSWL-IMCG, SRR, Samsung Electronics
diff --git a/tools/scan-build/ccc-analyzer b/tools/scan-build/ccc-analyzer
index b5445e6..55ab64d 100755
--- a/tools/scan-build/ccc-analyzer
+++ b/tools/scan-build/ccc-analyzer
@@ -39,24 +39,29 @@ if (`uname -a` =~ m/Darwin/) {
$DefaultCXXCompiler = 'g++';
}
-if ($FindBin::Script =~ /c\+\+-analyzer/) {
- $Compiler = $ENV{'CCC_CXX'};
- if (!defined $Compiler || ! -x $Compiler) { $Compiler = $DefaultCXXCompiler; }
+$Compiler = $ENV{'COMPILER'};
+$Clang = $ENV{'CLANG'};
+if (!defined $Compiler) {
+ if ($FindBin::Script =~ /c\+\+-analyzer/) {
+ $Compiler = $ENV{'CCC_CXX'};
+ if (!defined $Compiler) { $Compiler = $DefaultCXXCompiler; }
- $Clang = $ENV{'CLANG_CXX'};
- if (!defined $Clang || ! -x $Clang) { $Clang = 'clang++'; }
+ $Clang = $ENV{'CLANG_CXX'};
+ if (!defined $Clang) { $Clang = 'clang++'; }
- $IsCXX = 1
-}
-else {
- $Compiler = $ENV{'CCC_CC'};
- if (!defined $Compiler || ! -x $Compiler) { $Compiler = $DefaultCCompiler; }
+ $IsCXX = 1
+ }
+ else {
+ $Compiler = $ENV{'CCC_CC'};
+ if (!defined $Compiler) { $Compiler = $DefaultCCompiler; }
- $Clang = $ENV{'CLANG'};
- if (!defined $Clang || ! -x $Clang) { $Clang = 'clang'; }
+ $Clang = $ENV{'CLANG'};
+ if (!defined $Clang) { $Clang = 'clang'; }
- $IsCXX = 0
+ $IsCXX = 0
+ }
}
+if (!defined $Clang) { $Clang = 'clang'; }
##===----------------------------------------------------------------------===##
# Cleanup.
@@ -362,6 +367,7 @@ my %CompilerLinkerOptionMap = (
my %IgnoredOptionMap = (
'-MT' => 1, # Ignore these preprocessor options.
'-MF' => 1,
+ '-mllvm' => 1,
'-fsyntax-only' => 0,
'-save-temps' => 0,
@@ -418,20 +424,27 @@ my %LangsAccepted = (
# Main Logic.
##----------------------------------------------------------------------------##
+
+my $IsIntercepted = $ENV{'IS_INTERCEPTED'};
+if (!defined $IsIntercepted) { $IsIntercepted = 0; }
my $Action = 'link';
+
my @CompileOpts;
my @LinkOpts;
my @Files;
my $Lang;
my $Output;
my %Uniqued;
+my $Status;
-# Forward arguments to gcc.
-my $Status = system($Compiler,@ARGV);
-if (defined $ENV{'CCC_ANALYZER_LOG'}) {
- print STDERR "$Compiler @ARGV\n";
+if ($IsIntercepted ne 'true') {
+ # Forward arguments to gcc.
+ $Status = system($Compiler,@ARGV);
+ if (defined $ENV{'CCC_ANALYZER_LOG'}) {
+ print STDERR "$Compiler @ARGV\n";
+ }
+ if ($Status) { exit($Status >> 8); }
}
-if ($Status) { exit($Status >> 8); }
# Get the analysis options.
my $Analyses = $ENV{'CCC_ANALYZER_ANALYSIS'};
@@ -448,6 +461,9 @@ my $ConstraintsModel = $ENV{'CCC_ANALYZER_CONSTRAINTS_MODEL'};
#Get the internal stats setting.
my $InternalStats = $ENV{'CCC_ANALYZER_INTERNAL_STATS'};
+#Get additional args for build (like defines, compiler options, etc)
+my $AdditionalArgs = $ENV{'CCC_ANALYZER_ADDITIONAL_ARGS'};
+
# Get the output format.
my $OutputFormat = $ENV{'CCC_ANALYZER_OUTPUT_FORMAT'};
if (!defined $OutputFormat) { $OutputFormat = "html"; }
@@ -488,6 +504,16 @@ foreach (my $i = 0; $i < scalar(@ARGV); ++$i) {
next;
}
+ # FIXME: need MUCH more precise detection, this is only stub
+ if ($Arg =~ /^-march/) {
+ if (`uname -a` =~ m/Linux/) {
+ if ($Arg =~ /armv7/) {
+ push @CompileOpts, "-target";
+ push @CompileOpts, "armv7-none-linux-androideabi";
+ }
+ }
+ }
+
# Options with possible arguments that should pass through to compiler.
if (defined $CompileOptionMap{$ArgKey}) {
my $Cnt = $CompileOptionMap{$ArgKey};
@@ -495,6 +521,7 @@ foreach (my $i = 0; $i < scalar(@ARGV); ++$i) {
while ($Cnt > 0) { ++$i; --$Cnt; push @CompileOpts, $ARGV[$i]; }
next;
}
+
# Handle the case where there isn't a space after -iquote
if ($Arg =~ /^-iquote.*/) {
push @CompileOpts,$Arg;
@@ -700,6 +727,7 @@ if ($Action eq 'compile' or $Action eq 'link') {
push @CmdArgs, @CompileOpts;
push @CmdArgs, $file;
+ push @CmdArgs, $AdditionalArgs;
if (scalar @Archs) {
foreach my $arch (@Archs) {
@@ -717,4 +745,4 @@ if ($Action eq 'compile' or $Action eq 'link') {
}
}
-exit($Status >> 8);
+exit 0
diff --git a/tools/scan-build/scan-build b/tools/scan-build/scan-build
index 31dbfb4..653a9a9 100755
--- a/tools/scan-build/scan-build
+++ b/tools/scan-build/scan-build
@@ -893,6 +893,7 @@ sub SetEnv {
foreach my $opt ('CCC_ANALYZER_STORE_MODEL',
'CCC_ANALYZER_PLUGINS',
'CCC_ANALYZER_INTERNAL_STATS',
+ 'CCC_ANALYZER_ADDITIONAL_ARGS',
'CCC_ANALYZER_OUTPUT_FORMAT') {
my $x = $Options->{$opt};
if (defined $x) { $ENV{$opt} = $x }
@@ -995,14 +996,20 @@ sub RunBuildCommand {
my $CCAnalyzer = shift;
my $CXXAnalyzer = shift;
my $Options = shift;
+ my $UseInterceptor = shift;
+
+ # Setup the environment.
+ SetEnv($Options);
+
+ if ($UseInterceptor) {
+ my $BuildCmd = join ' ', @$Args;
+ return (system("$RealBin/strace_interceptor.py $BuildCmd") >> 8);
+ }
if ($Cmd =~ /\bxcodebuild$/) {
return RunXcodebuild($Args, $IgnoreErrors, $CCAnalyzer, $CXXAnalyzer, $Options);
}
- # Setup the environment.
- SetEnv($Options);
-
if ($Cmd =~ /(.*\/?gcc[^\/]*$)/ or
$Cmd =~ /(.*\/?cc[^\/]*$)/ or
$Cmd =~ /(.*\/?llvm-gcc[^\/]*$)/ or
@@ -1348,6 +1355,8 @@ my $MaxLoop = 0;
my $RequestDisplayHelp = 0;
my $ForceDisplayHelp = 0;
my $AnalyzerDiscoveryMethod;
+my $UseInterceptor = 0;
+my @AdditionalArgs;
if (!@ARGV) {
$ForceDisplayHelp = 1
@@ -1371,6 +1380,12 @@ while (@ARGV) {
next;
}
+ if ($arg eq '--use-interceptor') {
+ shift @ARGV;
+ $UseInterceptor = 1;
+ next;
+ }
+
if ($arg eq "-o") {
shift @ARGV;
@@ -1525,6 +1540,11 @@ while (@ARGV) {
push @PluginsToLoad, "-load", shift @ARGV;
next;
}
+ if ($arg eq "-additional-arg") {
+ shift @ARGV;
+ push @AdditionalArgs, shift @ARGV;
+ next;
+ }
if ($arg eq "--use-analyzer") {
shift @ARGV;
$AnalyzerDiscoveryMethod = shift @ARGV;
@@ -1649,11 +1669,13 @@ if ($MaxLoop > 0) { push @AnalysesToRun, "-analyzer-max-loop $MaxLoop"; }
my $CCC_ANALYZER_ANALYSIS = join ' ',@AnalysesToRun;
my $CCC_ANALYZER_PLUGINS = join ' ',@PluginsToLoad;
my $CCC_ANALYZER_CONFIG = join ' ',@ConfigOptions;
+my $CCC_ANALYZER_ADDITIONAL_ARGS = join ' ',@AdditionalArgs;
my %Options = (
'CC' => $Cmd,
'CXX' => $CmdCXX,
'CLANG' => $Clang,
'CLANG_CXX' => $ClangCXX,
+ 'CCC_ANALYZER_ADDITIONAL_ARGS' => $CCC_ANALYZER_ADDITIONAL_ARGS,
'VERBOSE' => $Verbose,
'CCC_ANALYZER_ANALYSIS' => $CCC_ANALYZER_ANALYSIS,
'CCC_ANALYZER_PLUGINS' => $CCC_ANALYZER_PLUGINS,
@@ -1676,7 +1698,7 @@ if (defined $OutputFormat) {
# Run the build.
my $ExitStatus = RunBuildCommand(\@ARGV, $IgnoreErrors, $Cmd, $CmdCXX,
- \%Options);
+ \%Options, $UseInterceptor);
if (defined $OutputFormat) {
if ($OutputFormat =~ /plist/) {
diff --git a/tools/scan-build/strace_interceptor.py b/tools/scan-build/strace_interceptor.py
new file mode 100644
index 0000000..5974cea
--- /dev/null
+++ b/tools/scan-build/strace_interceptor.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import re
+import os
+import sys
+import random
+import subprocess
+import multiprocessing
+
+from collections import namedtuple
+ProcessParams = namedtuple("ProcessParams", "root_dir work_dir")
+LaunchParams = namedtuple("LaunchParams", "exec_file exec_args")
+
+# execve strace output looks like:
+# PID Syscall_name("Syscall_cmd", ["Syscall_args_in_quotes_splitted_with_{, }"]
+# PID Syscall name Process name Argument string Environment variables
+# "(\d+) ......(\w+).....\(\"([\w/\\\]+)\",... \[(.*)\], \[(.*)\]"
+strace_execve_pattern = re.compile("(\d+) +(\w+)\((\"[\S]+\"), \[(.*)\], \[(.*)\]")
+
+# Only return value (pid) is interesting for us
+strace_fork_pattern = re.compile(".*?(\d+)$")
+
+# Select string (path) in brackets
+strace_chroot_chdir_pattern = re.compile("\d+ +\w+\(\"(.*)\"")
+
+# Select pid and system call name
+strace_main_pattern = re.compile("^(\d+) +([\+\w]+)")
+
+# Select pid
+strace_exit_pattern = re.compile("(\d+) +\+\+\+ exited")
+
+# Select name, pid and return result of resumed syscall
+strace_resumed_pattern = re.compile("(\d+) +<\.\.\. (.*) resumed>.*= (\d+)")
+
+# We can set regexp for compiler detection via environment variable
+compiler_regexp_str = os.environ.get('COMPILER_REGEXP',
+# "^\"(?:.*/)?(?:cc1(?:plus)?|clang)\"$")
+ "^\"(?:.*/)?(?:cc|(?:\S+-)?(?:gcc|g\+\+)|clang(?:\+\+)?)(?:-[\d\.]+)?\"$")
+compiler_regexp = re.compile(compiler_regexp_str)
+
+compiler_include_regexp = re.compile("(-[IL])([\S]+)")
+
+script_dir = os.path.dirname(__file__)
+
+need_debug = os.environ.get('INTERCEPTOR_DEBUG')
+debug_file = os.environ.get('INTERCEPTOR_DEBUG_FILE')
+
+unprocessed_list = dict()
+exec_interrupted_list = dict()
+
+def debug_print(str) :
+ if need_debug :
+ if debug_file :
+ with open(debug_file, 'a') as debug_out :
+ debug_out.write('%s\n' % str)
+ else :
+ print(str)
+
+# Android build system uses scripts with names same as compilers.
+# We just ignore them and intercept binary executives only.
+def is_script(path) :
+ file = open(path, "r")
+ line = file.readline()
+ file.close()
+ return line.startswith("#!")
+
+def start_analyzer(cmd, env) :
+ os.environ.update(env)
+ return os.system(cmd)
+
+analyzer_process_pool = multiprocessing.Pool(processes = int(os.environ.get('NUM_PROCESSES',
+ multiprocessing.cpu_count())))
+
+def add_root(path_str, root_dir) :
+ if path_str.startswith("/") :
+ return root_dir + "/" + path_str
+ is_include = compiler_include_regexp.search(path_str)
+ if is_include :
+ return is_include.groups()[0] + add_root(is_include.groups()[1], root_dir)
+ return path_str
+
+def analyzer_cmd(root_dir, work_dir, exec_file, exec_args) :
+ arg_list = [ ("\"" + add_root(arg[1:], root_dir)) \
+ for arg in exec_args.split(", ")[1:] ]
+ # Try to guess some additional includes
+ if (exec_file.startswith("\"")) :
+ exec_file = exec_file[1:-1]
+ sysroot = os.path.dirname(os.path.dirname(exec_file)) + "/sysroot"
+ if (os.path.exists(sysroot)) :
+ sysroot = add_root(sysroot, root_dir)
+ else :
+ sysroot = root_dir
+ exec_file = add_root(exec_file, root_dir)
+ if is_script(exec_file) :
+ return ""
+ return "cd %s/%s && IS_INTERCEPTED=true COMPILER=\"%s\" %s/ccc-analyzer %s" % \
+ (root_dir, work_dir, exec_file, script_dir, \
+ ' '.join(arg_list + ["-g", "-I\"%s/usr/include\"" % sysroot]))
+
+# Parsing functions
+# ------------------------------------------------------------------------------
+def parse_exec(pid, process_set, strace_out) :
+ exec_args = strace_execve_pattern.search(strace_out).groups()[2:]
+ exec_file = exec_args[0]
+ if compiler_regexp.match(exec_file) :
+ if strace_out.endswith("= 0\n") or strace_out.endswith("<unfinished ...>\n") :
+ if pid in process_set :
+ cmd = analyzer_cmd(process_set[pid].root_dir,
+ process_set[pid].work_dir,
+ exec_file, exec_args[1])
+ env_list = [ arg[1:-1] for arg in exec_args[2].split(", ") ]
+ env_dict = dict();
+ for env in env_list :
+ if env.find('=') != -1 :
+ key, value = env.split("=", 1)
+ env_dict[key] = value
+ debug_print('Environment: %s' % env_dict)
+ debug_print('Command: %s' % cmd)
+ if not cmd :
+ debug_print('Compiler cmd is a script')
+ elif cmd.find('conftest.c') == -1 : # Not a 'configure'
+ analyzer_process_pool.apply_async(start_analyzer, [cmd, env_dict])
+ else :
+ debug_print('conftest found, omitting analysis')
+ else :
+ waiting_vfork_list[pid] = LaunchParams(exec_file, exec_args[1])
+
+def resume_exec(pid, process_set, strace_out):
+ if strace_out.endswith(" = 0\n") :
+ parse_exec(pid, process_set, exec_interrupted_list[pid])
+ else :
+ del process_set[pid] # does not need to be tracked
+ del exec_interrupted_list[pid]
+
+def parse_fork(pid, process_set, strace_out) :
+ match = strace_fork_pattern.search(strace_out)
+ if match :
+ newpid = match.groups()[0]
+ process_set[newpid] = ProcessParams(process_set[pid].root_dir,
+ process_set[pid].work_dir)
+ if newpid in unprocessed_list :
+ for line in unprocessed_list[newpid] :
+ parse_input_string(process_set, line)
+ del unprocessed_list[newpid]
+
+def resume_fork(pid, process_set, retval):
+ process_set[retval] = ProcessParams(process_set[pid].root_dir,
+ process_set[pid].work_dir)
+ if retval in unprocessed_list :
+ for line in unprocessed_list[retval] :
+ parse_input_string(process_set, line)
+ del unprocessed_list[retval]
+
+# Model 'cd' cmd
+def changedir(old_dir_path, new_dir_path) :
+ if new_dir_path.startswith("/") :
+ return new_dir_path
+ return old_dir_path + "/" + new_dir_path
+
+# Changes work_dir of calling process
+def parse_chdir(pid, process_set, strace_out) :
+ new_workdir = strace_chroot_chdir_pattern.search(strace_out).groups()[0]
+ process_set[pid] = ProcessParams(process_set[pid].root_dir,
+ changedir(process_set[pid].work_dir, new_workdir))
+
+# Changes root_dir of calling process
+def parse_chroot(pid, process_set, strace_out) :
+ new_rootdir = strace_chroot_chdir_pattern.search(strace_out).groups()[0]
+ process_set[pid] = ProcessParams(changedir(process_set[pid].root_dir,
+ new_rootdir), process_set[pid].work_dir)
+
+def parse_exit(pid, process_set):
+ del process_set[pid]
+
+def parse_input_string(process_set, strace_out) :
+ is_resumed = strace_resumed_pattern.search(strace_out)
+ is_success = strace_out.endswith("= 0\n");
+ if is_resumed : # syscall resumed
+ pid, syscall, retval = is_resumed.groups()
+ if pid in process_set :
+ if syscall == "execve" :
+ resume_exec(pid, process_set, strace_out)
+ elif syscall == "clone" or syscall == "vfork" :
+ # All neccessary information can be taken from resuming message
+ resume_fork(pid, process_set, retval)
+ else :
+ if pid in unprocessed_list :
+ unprocessed_list[pid] += [strace_out]
+ else :
+ unprocessed_list[pid] = [strace_out]
+ else :
+ search_res = strace_main_pattern.search(strace_out)
+ if not search_res :
+ return
+ pid, syscall = search_res.groups()
+ if pid in process_set :
+ if syscall == "execve" :
+ if strace_out.endswith("<unfinished ...>\n") :
+ # Keep strace string to analyze it when syscall is finished
+ exec_interrupted_list[pid] = strace_out
+ elif is_success :
+ parse_exec(pid, process_set, strace_out)
+ elif syscall == "clone" or syscall == "vfork" :
+ parse_fork(pid, process_set, strace_out)
+ elif syscall == "chdir" :
+ parse_chdir(pid, process_set, strace_out)
+ elif syscall == "chroot" :
+ parse_chroot(pid, process_set, strace_out)
+ elif syscall == "+++" : # message like '28845 +++ exited with 0 +++'
+ parse_exit(pid, process_set)
+ else :
+ if pid in unprocessed_list :
+ unprocessed_list[pid] += [strace_out]
+ else :
+ unprocessed_list[pid] = [strace_out]
+
+# ------------------------------------------------------------------------------
+process_set = dict()
+
+env_trace_file_name = os.environ.get('TRACE_FILE')
+if not env_trace_file_name :
+ trace_file_name = "/tmp/strace%d" % random.randint(0, sys.maxint)
+ os.mkfifo(trace_file_name)
+ strace_args = ["strace", "-f", "-v", "-s", "1000000", "-o", trace_file_name, "-e",
+ "trace=vfork,fork,clone,execve,chdir,chroot", "-e", "signal="] +\
+ sys.argv[1:]
+ subprocess.Popen(strace_args)
+else :
+ trace_file_name = env_trace_file_name
+
+strace_out_file = open(trace_file_name, "r")
+line = strace_out_file.readline()
+pid, syscall = strace_main_pattern.search(line).groups()
+
+# 1st syscall should be execve
+if syscall != "execve" :
+ print("Oops...\n")
+ exit(1)
+process_set[pid] = ProcessParams("/", os.getcwd())
+
+while line :
+ parse_input_string(process_set, line)
+ line = strace_out_file.readline()
+
+analyzer_process_pool.close()
+analyzer_process_pool.join()
+strace_out_file.close()
+if not env_trace_file_name :
+ os.remove(trace_file_name)
_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits