Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ctu 2nd run #87

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e3be130
CTU basic implementation
Jun 1, 2017
665b205
Handle filenames with spaces
Jun 1, 2017
4bcb040
Fixing parameter related bugs
Jun 1, 2017
7a10e08
Better parameter for adressing analyzer directly
Jun 1, 2017
41302f0
Add plist-multi-file output format
Jun 1, 2017
c382f24
Merge branch 'master' into ctu
Jun 1, 2017
fffd8ec
Add CTU test case
Jun 2, 2017
af4fe81
Merge branch 'master' into ctu
Jun 14, 2017
0bc1a81
Make it python 3 compatible
Jun 14, 2017
f5cc766
More robust help and options
Jun 16, 2017
8475aea
Better naming for argparse variable
Jun 16, 2017
ef7ea79
Use namedtuple for ctu configuration
Jun 16, 2017
9a81c51
prefix_with unittest
Jun 16, 2017
105da91
Refactor function map merging
Jun 16, 2017
de39962
Unit tests for merging CTU maps
Jun 19, 2017
b2312e7
Refactor run_analyzer_parallel to have ctu logic separated
Jun 19, 2017
453c451
Move triple arch extraction into clang module
Jun 19, 2017
580a823
Use built-in encoding
Jun 19, 2017
b459743
Add extra test for spaces in filenames
Jun 19, 2017
db9bc40
Function map generation refactor and testing
Jun 19, 2017
13ebac8
Add function map generator detection
Jun 23, 2017
89f5571
Functional CTU test passes on CTU not supported
Jun 26, 2017
b4c252f
Merge branch 'master' into ctu
Jun 27, 2017
367614f
Make path handling windows compatible
Jun 28, 2017
3ab8c19
Use abspath for ctu-dir on dir hopping projects
Jun 30, 2017
52071a2
Protect parser libs from bogus reports
Jun 30, 2017
0b3dcdb
Merge branch 'master' into ctu
Dec 6, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libscanbuild/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

Execution = collections.namedtuple('Execution', ['pid', 'cwd', 'cmd'])

CtuConfig = collections.namedtuple('CtuConfig', ['collect', 'analyze', 'dir',
'func_map_cmd'])


def shell_split(string):
# type: (str) -> List[str]
Expand Down
239 changes: 221 additions & 18 deletions libscanbuild/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@
import platform
import contextlib
import datetime
import shutil
import glob
import argparse # noqa: ignore=F401
from typing import Any, Dict, List, Callable, Iterable, Generator # noqa: ignore=F401

from libscanbuild import command_entry_point, wrapper_entry_point, \
wrapper_environment, run_build, run_command
wrapper_environment, run_build, run_command, CtuConfig
from libscanbuild.arguments import parse_args_for_scan_build, \
parse_args_for_analyze_build
from libscanbuild.intercept import capture
from libscanbuild.report import document
from libscanbuild.compilation import Compilation, classify_source, \
CompilationDatabase
from libscanbuild.clang import get_version, get_arguments
from libscanbuild.clang import get_version, get_arguments, get_triple_arch
from libscanbuild import Execution # noqa: ignore=F401

__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
Expand All @@ -43,6 +45,9 @@
COMPILER_WRAPPER_CXX = 'analyze-c++'
ENVIRONMENT_KEY = 'ANALYZE_BUILD'

CTU_FUNCTION_MAP_FILENAME = 'externalFnMap.txt'
CTU_TEMP_FNMAP_FOLDER = 'tmpExternalFnMaps'


@command_entry_point
def scan_build():
Expand All @@ -61,7 +66,7 @@ def scan_build():
exit_code, compilations = capture(args)
if need_analyzer(args.build):
# run the analyzer against the captured commands
run_analyzer_parallel(compilations, args)
run_analyzer_with_ctu(compilations, args)
else:
# run build command and analyzer with compiler wrappers
environment = setup_environment(args)
Expand All @@ -82,7 +87,7 @@ def analyze_build():
with report_directory(args.output, args.keep_empty) as args.output:
# run the analyzer against a compilation db
compilations = CompilationDatabase.load(args.cdb)
run_analyzer_parallel(compilations, args)
run_analyzer_with_ctu(compilations, args)
# cover report generation and bug counting
number_of_bugs = document(args)
# set exit status as it was requested
Expand All @@ -103,22 +108,35 @@ def need_analyzer(args):
return len(args) > 0 and not re.search('configure|autogen', args[0])


def prefix_with(constant, pieces):
# type: (Any, List[Any]) -> List[Any]
""" From a sequence create another sequence where every second element
is from the original sequence and the odd elements are the prefix.

eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """

return [elem for piece in pieces for elem in [constant, piece]]


def get_ctu_config(args):
""" CTU configuration is created from the chosen phases and dir """

return (
CtuConfig(collect=args.ctu_phases.collect,
analyze=args.ctu_phases.analyze,
dir=args.ctu_dir,
func_map_cmd=args.func_map_cmd)
if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir')
else CtuConfig(collect=False, analyze=False, dir='', func_map_cmd=''))


def analyze_parameters(args):
# type: (argparse.Namespace) -> Dict[str, Any]
""" Mapping between the command line parameters and the analyzer run
method. The run method works with a plain dictionary, while the command
line parameters are in a named tuple.
The keys are very similar, and some values are preprocessed. """

def prefix_with(constant, pieces):
# type: (Any, List[Any]) -> List[Any]
""" From a sequence create another sequence where every second element
is from the original sequence and the odd elements are the prefix.

eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """

return [elem for piece in pieces for elem in [constant, piece]]

def direct_args(args):
# type: (argparse.Namespace) -> List[str]
""" A group of command line arguments can mapped to command
Expand Down Expand Up @@ -165,10 +183,73 @@ def direct_args(args):
'output_failures': args.output_failures,
'direct_args': direct_args(args),
'force_debug': args.force_debug,
'excludes': args.excludes
'excludes': args.excludes,
'ctu': get_ctu_config(args)
}


def create_global_ctu_function_map(func_map_lines):
""" Takes iterator of individual function maps and creates a global map
keeping only unique names. We leave conflicting names out of CTU.
A function map contains the id of a function (mangled name) and the
originating source (the corresponding AST file) name."""

mangled_to_asts = {}

for line in func_map_lines:
mangled_name, ast_file = line.strip().split(' ', 1)
# We collect all occurences of a function name into a list
if mangled_name not in mangled_to_asts:
mangled_to_asts[mangled_name] = {ast_file}
else:
mangled_to_asts[mangled_name].add(ast_file)

mangled_ast_pairs = []

for mangled_name, ast_files in mangled_to_asts.items():
if len(ast_files) == 1:
mangled_ast_pairs.append((mangled_name, ast_files.pop()))

return mangled_ast_pairs


def merge_ctu_func_maps(ctudir):
""" Merge individual function maps into a global one.

As the collect phase runs parallel on multiple threads, all compilation
units are separately mapped into a temporary file in CTU_TEMP_FNMAP_FOLDER.
These function maps contain the mangled names of functions and the source
(AST generated from the source) which had them.
These files should be merged at the end into a global map file:
CTU_FUNCTION_MAP_FILENAME."""

def generate_func_map_lines(fnmap_dir):
""" Iterate over all lines of input files in random order. """

files = glob.glob(os.path.join(fnmap_dir, '*'))
for filename in files:
with open(filename, 'r') as in_file:
for line in in_file:
yield line

def write_global_map(ctudir, mangled_ast_pairs):
""" Write (mangled function name, ast file) pairs into final file. """

extern_fns_map_file = os.path.join(ctudir, CTU_FUNCTION_MAP_FILENAME)
with open(extern_fns_map_file, 'w') as out_file:
for mangled_name, ast_file in mangled_ast_pairs:
out_file.write('%s %s\n' % (mangled_name, ast_file))

fnmap_dir = os.path.join(ctudir, CTU_TEMP_FNMAP_FOLDER)

func_map_lines = generate_func_map_lines(fnmap_dir)
mangled_ast_pairs = create_global_ctu_function_map(func_map_lines)
write_global_map(ctudir, mangled_ast_pairs)

# Remove all temporary files
shutil.rmtree(fnmap_dir, ignore_errors=True)


def run_analyzer_parallel(compilations, args):
# type: (Iterable[Compilation], argparse.Namespace) -> None
""" Runs the analyzer against the given compilations. """
Expand All @@ -185,6 +266,32 @@ def run_analyzer_parallel(compilations, args):
pool.join()


def run_analyzer_with_ctu(compilations, args):
""" Governs multiple runs in CTU mode or runs once in normal mode. """

ctu_config = get_ctu_config(args)
if ctu_config.collect:
shutil.rmtree(ctu_config.dir, ignore_errors=True)
os.makedirs(os.path.join(ctu_config.dir, CTU_TEMP_FNMAP_FOLDER))
if ctu_config.collect and ctu_config.analyze:
# compilations is a generator but we want to do 2 CTU rounds
compilation_list = list(compilations)
# CTU strings are coming from args.ctu_dir and func_map_cmd,
# so we can leave it empty
args.ctu_phases = CtuConfig(collect=True, analyze=False,
dir='', func_map_cmd='')
run_analyzer_parallel(compilation_list, args)
merge_ctu_func_maps(ctu_config.dir)
args.ctu_phases = CtuConfig(collect=False, analyze=True,
dir='', func_map_cmd='')
run_analyzer_parallel(compilation_list, args)
shutil.rmtree(ctu_config.dir, ignore_errors=True)
else:
run_analyzer_parallel(compilations, args)
if ctu_config.collect:
merge_ctu_func_maps(ctu_config.dir)


def setup_environment(args):
# type: (argparse.Namespace) -> Dict[str, str]
""" Set up environment for build command to interpose compiler wrapper. """
Expand Down Expand Up @@ -287,9 +394,9 @@ def wrapper(*args, **kwargs):
'excludes', # list of directories
'force_debug', # kill non debug macros
'output_dir', # where generated report files shall go
'output_format', # it's 'plist', 'html', 'plist-html',
# 'text' or 'plist-multi-file'
'output_failures']) # generate crash reports or not
'output_format', # it's 'plist', 'html', both or plist-multi-file
'output_failures', # generate crash reports or not
'ctu']) # ctu control options
def run(opts):
# type: (Dict[str, Any]) -> Dict[str, Any]
""" Entry point to run (or not) static analyzer against a single entry
Expand Down Expand Up @@ -416,8 +523,104 @@ def target():
return result


def func_map_list_src_to_ast(func_src_list, triple_arch):
""" Turns textual function map list with source files into a
function map list with ast files. """

func_ast_list = []
for fn_src_txt in func_src_list:
dpos = fn_src_txt.find(" ")
mangled_name = fn_src_txt[0:dpos]
path = fn_src_txt[dpos + 1:]
# Normalize path on windows as well
path = os.path.splitdrive(path)[1]
# Make relative path out of absolute
path = path[1:] if path[0] == os.sep else path
ast_path = os.path.join("ast", triple_arch, path + ".ast")
func_ast_list.append(mangled_name + "@" + triple_arch + " " + ast_path)
return func_ast_list


@require(['clang', 'directory', 'flags', 'direct_args', 'source', 'ctu'])
def ctu_collect_phase(opts):
""" Preprocess source by generating all data needed by CTU analysis. """

def generate_ast(triple_arch):
""" Generates ASTs for the current compilation command. """

args = opts['direct_args'] + opts['flags']
ast_joined_path = os.path.join(opts['ctu'].dir, 'ast', triple_arch,
os.path.realpath(opts['source'])[1:] +
'.ast')
ast_path = os.path.abspath(ast_joined_path)
ast_dir = os.path.dirname(ast_path)
if not os.path.isdir(ast_dir):
os.makedirs(ast_dir)
ast_command = [opts['clang'], '-emit-ast']
ast_command.extend(args)
ast_command.append('-w')
ast_command.append(opts['source'])
ast_command.append('-o')
ast_command.append(ast_path)
logging.debug("Generating AST using '%s'", ast_command)
run_command(ast_command, cwd=opts['directory'])

def map_functions(triple_arch):
""" Generate function map file for the current source. """

args = opts['direct_args'] + opts['flags']
funcmap_command = [opts['ctu'].func_map_cmd]
funcmap_command.append(opts['source'])
funcmap_command.append('--')
funcmap_command.extend(args)
logging.debug("Generating function map using '%s'", funcmap_command)
func_src_list = run_command(funcmap_command, cwd=opts['directory'])
func_ast_list = func_map_list_src_to_ast(func_src_list, triple_arch)
extern_fns_map_folder = os.path.join(opts['ctu'].dir,
CTU_TEMP_FNMAP_FOLDER)
if func_ast_list:
with tempfile.NamedTemporaryFile(mode='w',
dir=extern_fns_map_folder,
delete=False) as out_file:
out_file.write("\n".join(func_ast_list) + "\n")

cwd = opts['directory']
cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \
+ [opts['source']]
triple_arch = get_triple_arch(cmd, cwd)
generate_ast(triple_arch)
map_functions(triple_arch)


@require(['ctu'])
def dispatch_ctu(opts, continuation=run_analyzer):
""" Execute only one phase of 2 phases of CTU if needed. """

ctu_config = opts['ctu']
# Recover namedtuple from json when coming from analyze_cc
if not hasattr(ctu_config, 'collect'):
ctu_config = CtuConfig(collect=ctu_config[0],
analyze=ctu_config[1],
dir=ctu_config[2],
func_map_cmd=ctu_config[3])
opts['ctu'] = ctu_config

if ctu_config.collect or ctu_config.analyze:
assert ctu_config.collect != ctu_config.analyze
if ctu_config.collect:
return ctu_collect_phase(opts)
if ctu_config.analyze:
ctu_options = ['ctu-dir=' + ctu_config.dir,
'reanalyze-ctu-visited=true']
analyzer_options = prefix_with('-analyzer-config', ctu_options)
direct_options = prefix_with('-Xanalyzer', analyzer_options)
opts['direct_args'].extend(direct_options)

return continuation(opts)


@require(['flags', 'force_debug'])
def filter_debug_flags(opts, continuation=run_analyzer):
def filter_debug_flags(opts, continuation=dispatch_ctu):
# type: (...) -> Dict[str, Any]
""" Filter out nondebug macros when requested. """

Expand Down
Loading