diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json new file mode 100644 index 000000000..8a97510dd --- /dev/null +++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json @@ -0,0 +1,114 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + }, + "malloc": { + "wrappers": [ + "Malloc", + "Realloc", + "Malloc2", + "Realloc2", + "ARR_CreateInstance", + "realloc_array", + "ARR_GetNewElement" + ] + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json new file mode 100644 index 000000000..a2fe392e4 --- /dev/null +++ b/scripts/incremental/benchmarking/conf/chrony.json @@ -0,0 +1,114 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + }, + "malloc": { + "wrappers": [ + "Malloc", + "Realloc", + "Malloc2", + "Realloc2", + "ARR_CreateInstance", + "realloc_array", + "ARR_GetNewElement" + ] + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json new file mode 100644 index 000000000..46ad26fce --- /dev/null +++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json @@ -0,0 +1,103 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json new file mode 100644 index 000000000..3e80b8ffe --- /dev/null +++ b/scripts/incremental/benchmarking/conf/figlet.json @@ -0,0 +1,103 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json new file mode 100644 index 000000000..dbe858b98 --- /dev/null +++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json @@ -0,0 +1,122 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "malloc": { + "wrappers": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc" + ] + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true, + "extraspecials": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc", + "ZSTD_customFree" + ] + }, + "pre": { + "cppflags": [ + "-DZSTD_NO_INTRINSICS", + "-D_FORTIFY_SOURCE=0", + "-DGOBLINT_NO_ASSERT", + "-DGOBLINT_NO_BSEARCH" + ] + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json new file mode 100644 index 000000000..b3c4a49ac --- /dev/null +++ b/scripts/incremental/benchmarking/conf/zstd-race.json @@ -0,0 +1,122 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "malloc": { + "wrappers": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc" + ] + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true, + "extraspecials": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc", + "ZSTD_customFree" + ] + }, + "pre": { + "cppflags": [ + "-DZSTD_NO_INTRINSICS", + "-D_FORTIFY_SOURCE=0", + "-DGOBLINT_NO_ASSERT", + "-DGOBLINT_NO_BSEARCH" + ] + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py new file mode 100644 index 000000000..873d00e62 --- /dev/null +++ b/scripts/incremental/benchmarking/efficiency.py @@ -0,0 +1,297 @@ +from pydriller import Repository, Git +import utils +import psutil +import multiprocessing as mp +import os +import subprocess +import itertools +import shutil +import json +from datetime import datetime +import sys +import pandas as pd + +# Some basic settings for the different projects (currently zstd, sqlite) +import projects + +################################################################################ +# Usage: python3 incremental_smallcommits.py +# Executing the script will overwrite the directory 'result_efficiency' in the cwd. +# The script for building the compilation database is assumed to be found in the analyzers script directory and the +# config file is assumed to be found in the conf directory of the analyzers repository. +# The single test runs are mapped to processors according to the coremapping. The one specified in the section below +# should work for Intel machines, otherwise you might need to adapt it according to the description. +usage = "Use script like this: python3 efficiency.py " +if len(sys.argv) != 4: + print("Wrong number of parameters.\n" + usage) + exit() + +# Load some project dependent settings: +project = projects.projects.get(sys.argv[2]) +if project == None: + print("Given Project \"" + sys.argv[2] + "\" is not one of the supported projects. Add a new project by modifying projects.py.\n" + usage) + exit() + +url = project.url +repo_name = project.repo_name +build_compdb = project.build_compdb +conf_base = project.conf_base +conf_base = os.path.join(os.getcwd(), conf_base + ".json") +conf_incrpost = project.conf_incrpost +conf_incrpost = os.path.join(os.getcwd(), conf_incrpost + ".json") +begin = project.begin +to = project.to +files = project.files +diff_exclude = project.diff_exclude + +# Project independent settings +result_dir = os.path.join(os.getcwd(), 'result_efficiency') +maxCLOC = None # was 50; can be deactivated with None +analyzer_dir = sys.argv[1] +only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables +################################################################################ +try: + numcores = int(sys.argv[3]) +except ValueError: + print("Parameter should be a number.\nUse script like this:" + usage) + exit() +avail_phys_cores = psutil.cpu_count(logical=False) +allowedcores = avail_phys_cores - 1 +if not only_collect_results and numcores > allowedcores: + print("Not enough physical cores on this machine (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")") + exit() +# For equal load distribution, choose a processes to core mapping, +# use only physical cores and have an equal number of processes per cache. +# The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'. +# For our test server: +coremapping1 = [i for i in range(numcores - numcores//2)] +coremapping2 = [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)] +coremapping = [coremapping1[i//2] if i%2==0 else coremapping2[i//2] for i in range(len(coremapping1) + len(coremapping2))] +################################################################################ + +def filter_commits_false_pred(repo_path): + def pred(c): + relCLOC = utils.calculateRelCLOC(repo_path, c, diff_exclude) + return relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC) + return pred + +def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): + count_analyzed = 0 + count_skipped = 0 + count_failed = 0 + analyzed_commits = {} + repo_path = os.path.join(cwd, repo_name) + + for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c): + gr = Git(repo_path) + + #print("\n" + commit.hash) + #print('changed LOC: ', commit.lines) + #print('merge commit: ', commit.merge) + + # skip merge commits and commits that have no or less than maxCLOC of relevant code changes + relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude) # use this to filter commits by actually relevant changes + #print("relCLOC: ", relCLOC) + if relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC): + #print('Skip this commit: merge commit or too many relevant changed LOC') + count_skipped+=1 + continue + + # analyze + try_num = from_c + count_analyzed + count_failed + 1 + outtry = os.path.join(outdir, str(try_num)) + parent = gr.get_commit(commit.parents[0]) + #print('Analyze this commit incrementally. #', try_num) + + utils.reset_incremental_data(os.path.join(cwd, 'incremental_data')) + failed = True + try: + #print('Starting from parent', str(parent.hash), ".") + outparent = os.path.join(outtry, 'parent') + os.makedirs(outparent) + + default_options = ['-v'] + + add_options = default_options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files) + + #print('And now analyze', str(commit.hash), 'from scratch.') + outchild_non_incr = os.path.join(outtry, 'child-non-incr') + os.makedirs(outchild_non_incr) + # Do not save in this run to not pollute results + add_options = default_options + ['--disable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_non_incr, conf_base, add_options, files) + + #print('And now analyze', str(commit.hash), 'incrementally.') + outchild = os.path.join(outtry, 'child') + os.makedirs(outchild) + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files) + + #print('And again incremental, this time with incremental postsolver') + outchild_incr_post = os.path.join(outtry, 'child-incr-post') + os.makedirs(outchild_incr_post) + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_incr_post, conf_incrpost, add_options, files) + + #print('And again incremental, this time with incremental postsolver and reluctant') + outchild_rel = os.path.join(outtry, 'child-rel') + os.makedirs(outchild_rel) + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_rel, conf_incrpost, add_options, files) + + count_analyzed+=1 + failed = False + except subprocess.CalledProcessError as e: + print('Aborted because command ', e.cmd, 'failed.') + count_failed+=1 + os.makedirs(outtry, exist_ok=True) + with open(os.path.join(outtry,'commit_properties.log'), "w+") as file: + json.dump({"hash": commit.hash, "parent_hash": parent.hash, "CLOC": commit.lines, "relCLOC": relCLOC, "failed": failed}, file) + analyzed_commits[try_num]=(str(commit.hash)[:6], relCLOC) + + num_commits = count_analyzed + count_skipped + count_failed + print("\nCommits traversed in total: ", num_commits) + print("Analyzed: ", count_analyzed) + print("Failed: ", count_failed) + print("Skipped: ", count_skipped) + +def add_version_with_cpu_suffix(strings): + string_with_cpu_suffix = list(map (lambda prefix : "CPU_" + prefix , strings)) + return strings + string_with_cpu_suffix + +def collect_data(outdir): + data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], "Change in number of race warnings": []} + + config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] + field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] + field_indexes = ["runtime", "analysis_time", "solving_time"] + + field_prefixes = add_version_with_cpu_suffix(field_prefixes) + field_indexes = add_version_with_cpu_suffix(field_indexes) + + for prefix in field_prefixes: + for config in config_headers: + data[prefix + config] = [] + + if not os.path.exists(outdir): + return + + for t in os.listdir(outdir): + parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog) + child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog) + child_log = os.path.join(outdir, t, 'child', utils.analyzerlog) + child_posts_log = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog) + child_posts_rel_log = os.path.join(outdir, t, 'child-rel', utils.analyzerlog) + commit_prop_log = os.path.join(outdir, t, 'commit_properties.log') + t = int(t) + commit_prop = json.load(open(commit_prop_log, "r")) + data["Changed LOC"].append(commit_prop["CLOC"]) + data["Relevant changed LOC"].append(commit_prop["relCLOC"]) + data["Failed?"].append(commit_prop["failed"]) + data["Commit"].append(commit_prop["hash"][:7]) + + + if commit_prop["failed"] == True: + for field in range(field_indexes.__len__()): + header_prefix = field_prefixes[field] + field_index = field_indexes[field] + for config in range(config_headers.__len__()): + header = header_prefix + config_headers[config] + data[header].append(float(0)) + + data["Changed/Added/Removed functions"].append(0) + data["Change in number of race warnings"].append(0) + continue + + logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log] + infos = list(map(utils.extract_from_analyzer_log, logs)) + + + child_incr_index = 2 + child_incr_info = infos[child_incr_index] + data["Changed/Added/Removed functions"].append(int(child_incr_info["changed"]) + int(child_incr_info["added"]) + int(child_incr_info["removed"])) + + for field in range(field_indexes.__len__()): + header_prefix = field_prefixes[field] + field_index = field_indexes[field] + for config in range(config_headers.__len__()): + header = header_prefix + config_headers[config] + info = infos[config] + data[header].append(float(info[field_index])) + + parent_index = 0 + parent_info = infos[parent_index] + + child_non_incr_index = 1 + child_non_incr_info = infos[child_non_incr_index] + + data["Change in number of race warnings"].append(int(child_non_incr_info["race_warnings"] - int(parent_info["race_warnings"]))) + return data + +def runperprocess(core, from_c, to_c): + if not only_collect_results: + psutil.Process().cpu_affinity([core]) + cwd = os.getcwd() + outdir = os.path.join(cwd, 'out') + if not only_collect_results: + if os.path.exists(outdir) and os.path.isdir(outdir): + shutil.rmtree(outdir) + analyze_small_commits_in_repo(cwd, outdir, from_c, to_c) + data_set = collect_data(outdir) + + df = pd.DataFrame(data_set) + #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) + print(df) + df.to_csv('results.csv', sep =';') + +def analyze_chunks_of_commits_in_parallel(): + processes = [] + + # calculate actual number of interesting commits up-front to allow for similar load distribution + iter = itertools.filterfalse(filter_commits_false_pred(os.path.join(os.getcwd(), repo_name)), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=os.getcwd()).traverse_commits()) + num_commits = sum(1 for _ in iter) + print("Number of potentially interesting commits:", num_commits) + perprocess = num_commits // numcores if num_commits % numcores == 0 else num_commits // numcores + 1 + print("Per process: " + str(perprocess)) + + for i in range(numcores): + dir = "process" + str(i) + if not only_collect_results: + os.mkdir(dir) + os.chdir(dir) + # run script + start = perprocess * i + end = perprocess * (i + 1) if i < numcores - 1 else num_commits + if not only_collect_results: + p = mp.Process(target=runperprocess, args=[coremapping[i], start, end]) + p.start() + processes.append(p) + # time.sleep(random.randint(5,60)) # add random delay between process creation to try to reduce interference + else: + runperprocess(coremapping[i], start, end) + os.chdir(result_dir) + + for p in processes: + p.join() + +def merge_results(): + filename = "results.csv" + frames = [] + for process_dir in os.listdir("."): + path = os.path.join(process_dir, filename) + if os.path.exists(path): + t = pd.read_csv(path, index_col=0, sep=";") + frames.append(t) + if len(frames) > 0: + df = pd.concat(frames) + #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) + df.to_csv('total_results.csv', sep=";") + + +if not only_collect_results: + os.mkdir(result_dir) +os.chdir(result_dir) + +analyze_chunks_of_commits_in_parallel() +merge_results() diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py new file mode 100644 index 000000000..ede93d8b7 --- /dev/null +++ b/scripts/incremental/benchmarking/plot.py @@ -0,0 +1,230 @@ +import utils +import os +import shutil + +description_non_incr = "(1)" +description_incr = "(2)" +description_incr_post = "(3)" +description_incr_rel ="(4)" + +def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): + changed_loc_filter = lambda x : x >= 0 # no filtering + outfile_nonincr_vs_incr = "figure_bar.pgf" + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) + + data_set = df[["Relevant changed LOC", utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child]] + data_set = data_set.rename(columns={utils.cpu_runtime_header_non_incr_child: description_non_incr, utils.cpu_runtime_header_incr_child: description_incr, utils.cpu_runtime_header_incr_posts_child: description_incr_post, utils.cpu_runtime_header_incr_posts_rel_child: description_incr_rel}) + + colors = ["tab:olive", "tab:blue", "tab:orange", "tab:green", "tab:red"] + textwidth = 7 + size = (textwidth,textwidth/3) + + utils.barplot(data_set, figure_dir, outfile_nonincr_vs_incr, size, colors) + +def cummulative_distr_compare2(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir): + num_bins = 2000 + outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" + outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) + + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) + datanonincr = {"values": data[0], "label": description_non_incr} + dataincr = {"values": data[1], "label": description_incr} + + utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) + + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) + dataincr = {"values": data[0], "label": description_incr} + datarelincr = {"values": data[1], "label": description_incr_rel} + + utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) + + + +def cummulative_distr_all4_filter(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir): + num_bins = 2000 + outfile_nonincr_vs_incr = "figure_cum_distr_all3"+ suffix + ".pdf" + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) + + data, base = utils.create_cum_data(df, num_bins, [utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child]) + data_non_incr = {"values": data[0], "label": description_non_incr} + data_incr = {"values": data[1], "label": description_incr} + data_incr_post = {"values": data[2], "label": description_incr_post} + data_incr_rel = {"values": data[3], "label": description_incr_rel} + utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + +def cummulative_distr_all4(results_dir, results_csv_filenmane, figure_dir): + greater_50 = lambda x : x > 50 + cummulative_distr_all4_filter(results_dir, "_greater_50_loc_changed", greater_50, results_csv_filenmane, figure_dir) + + leq_50 = lambda x : x <= 50 + cummulative_distr_all4_filter(results_dir, "_leq_50_loc_changed", leq_50, results_csv_filenmane, figure_dir) + + +def distribution_absdiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) + + # plot incremental vs non-incremental + diff = df.loc[:,utils.runtime_header_non_incr_child] - df.loc[:,utils.runtime_header_incr_child] + utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) + + # plot reluctant vs. basic incremental + diff = df.loc[:,utils.runtime_header_incr_child] - df.loc[:,utils.runtime_header_incr_posts_rel_child] + utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) + +def distribution_reldiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) + + # plot incremental vs non-incremental + print(df[utils.runtime_header_incr_child].astype('float')) + diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) + + # plot reluctant vs. basic incremental + diff = 1 - df.loc[:,utils.runtime_header_incr_posts_rel_child] / df.loc[:,utils.runtime_header_incr_child] + utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) + +def paper_efficiency_graphs(dir_results, changed_loc_filter, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): + df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), changed_loc_filter, filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) + diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float') + diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float') + diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + step = 0.01 + for i, diff in enumerate([diff1,diff2,diff3,diff4]): + # output textwidth in latex with + # \usepackage{layouts} + # \printinunitsof{cm}\prntlen{\textwidth} + # \printinunitsof{in}\prntlen{\textwidth} + # -> 17.7917cm / 7.00697in + textwidth = 7 + xlimleft = None + xlimright = 1.05 + xlabel = "Relative speedup" if i==3 else None + ylabel = "\# Commits" if i==0 or i==3 else None + outfile = os.path.join(outdir, "efficiency_figure_" + str(i) + ".pgf") + if i == 0: + size = (textwidth/3+0.1, textwidth/4) # additional ylabel + elif i == 1: + xlimleft = -0.3 + size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel + elif i == 3: + size = (textwidth, textwidth/4) + xlimright = 1.02 + step = 0.005 + else: + size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel + utils.hist_plot(diff, step, None, xlabel, ylabel, outfile, + size, xlim_left=xlimleft, xlim_right=xlimright, cutoffs=None) + + # print statistics + for e in diff: + if (xlimleft and e < xlimleft) or (xlimright and e > xlimright): + print("excluded", e, "from efficiency figure", i) + diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]: + print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%") + print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%") + +def paper_precision_graph_box(results_precision, filename, outdir): + df = utils.get_data_from_json(os.path.join(results_precision, filename)) + + # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15} + lessprec1 = 'intermediate precision.1.precision.lessprec' + lessprec2 = 'intermediate precision.2.precision.lessprec' + lessprec5 = 'intermediate precision.5.precision.lessprec' + lessprec10 = 'intermediate precision.10.precision.lessprec' + lessprec15 = 'intermediate precision.15.precision.lessprec' + total1 = 'intermediate precision.1.precision.total' + total2 = 'intermediate precision.2.precision.total' + total5 = 'intermediate precision.5.precision.total' + total10 = 'intermediate precision.10.precision.total' + total15 = 'intermediate precision.15.precision.total' + + x = [1,2,5,10,15] + data = [] + lessprec = [lessprec1, lessprec2, lessprec5, lessprec10, lessprec15] + total = [total1, total2, total5, total10, total15] + for l, t in zip(lessprec, total): + ratio = df[l] / df[t] + data.append(ratio.dropna()) + + halftextwidth = 3.3 + size=(halftextwidth,halftextwidth*2/3) + utils.box_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) + + +def paper_precision_graph(results_precision, filename, outdir, suffix): + df = utils.get_data_from_json(os.path.join(results_precision, filename)) + + # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15} + lessprec1 = 'intermediate precision.1.precision.lessprec' + lessprec2 = 'intermediate precision.2.precision.lessprec' + lessprec5 = 'intermediate precision.5.precision.lessprec' + lessprec10 = 'intermediate precision.10.precision.lessprec' + lessprec15 = 'intermediate precision.15.precision.lessprec' + lessprecfinal = 'final precision.lessprec' + total1 = 'intermediate precision.1.precision.total' + total2 = 'intermediate precision.2.precision.total' + total5 = 'intermediate precision.5.precision.total' + total10 = 'intermediate precision.10.precision.total' + total15 = 'intermediate precision.15.precision.total' + totalfinal = 'final precision.total' + + data = [] + for i in range(len(df.index)): + x = [1,2,5,10,15,df.iloc[i]['length']] + vals = df.iloc[i][[lessprec1, lessprec2, lessprec5, lessprec10, lessprec15, lessprecfinal]].values + total = df.iloc[i][[total1, total2, total5, total10, total15, totalfinal]].values + x = [x[i] for i in range(len(x)) if vals[i] == vals[i]] + y = [vals[i] / total[i] for i in range(len(vals)) if vals[i] == vals[i] and total[i] == total[i]] + data.append((x,y)) + halftextwidth = 3.3 + size=(halftextwidth,halftextwidth*2/3) + outfile = os.path.join(outdir, "precision_figure_" + suffix + ".pgf") + utils.scatter_plot(data, "\# Commits", "Share of less precise program points", outfile, size) + + +def main(): + projects = ["figlet", "chrony", "zstd"] + results_efficiency = "result_efficiency_" + results_precision = "result_precision_" + + for project in projects: + efficiency_results = results_efficiency + project + precision_results = results_precision + project + + + if not (os.path.exists(efficiency_results) or os.path.exists(precision_results)): + print("Results for project " + project + " do not exist. Skipping.") + continue + else: + print("Creating plots for project " + project + ".") + + figures_dir = os.path.join("figures", project) + if os.path.exists(figures_dir): + shutil.rmtree(figures_dir) + os.makedirs(figures_dir) + + if os.path.exists(efficiency_results): + efficieny_filename = "total_results.csv" + print("Creating efficiency plots.") + # cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) + cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir) + efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) + # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False) + else: + print("No efficiency results available.") + + # precision plot + if os.path.exists(precision_results): + for suffix in utils.compare_runs_suffixes: + precision_filename = utils.precision_result_file_name_with_suffix(suffix) + print("Creating precision plots for configuration:" + suffix) + paper_precision_graph(precision_results, precision_filename, figures_dir, suffix) + else: + print("No precision results available.") + +main() diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py new file mode 100644 index 000000000..cce567ebb --- /dev/null +++ b/scripts/incremental/benchmarking/precision.py @@ -0,0 +1,320 @@ +import utils +from pydriller import Repository, Git +import psutil +import os +import sys +from datetime import datetime +import json +import shutil +import pytz +import multiprocessing as mp + +# Some basic settings for the different projects (currently zstd, sqlite) +import projects + +################################################################################ +# Usage: python3 incremental_smallcommits.py +# Executing the script will overwrite the directory 'result_precision' in the cwd. +# The script for building the compilation database is assumed to be found in the analyzers script directory and the +# config file is assumed to be found in the conf directory of the analyzers repository. +usage = "Use script like this: python3 precision.py " +if len(sys.argv) != 4: + print("Wrong number of parameters.\n" + usage) + exit() + +# Load some project dependent settings: +project = projects.projects.get(sys.argv[2]) +if project == None: + print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.") + exit() + +url = project.url +repo_name = project.repo_name +build_compdb = project.build_compdb +cwd = os.getcwd() +conf = os.path.join(cwd, project.conf_base + ".json") +conf_incrpost = os.path.join(cwd, project.conf_incrpost + ".json") +begin = project.begin +to = project.to +diff_exclude = project.diff_exclude +files = project.files +branch = project.branch + +try: + numcores = int(sys.argv[3]) +except ValueError: + print("Parameter should be a number.\n" + usage) + exit() + +# Project independent settings +analyzer_dir = sys.argv[1] +res_dir = os.path.abspath('result_precision') +maxCLOC = None +only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables +################################################################################ + +utc = pytz.UTC +compare_commits = [1,2,5,10,15] +skipSeqShorterEq = 5 # minimum number of incremental commits in chain + +def start_commit_for_sequence_search(): + current_commit = "" + for commit in Repository(url, to=to, only_in_branch=branch, order='reverse', clone_repo_to=res_dir).traverse_commits(): + current_commit = commit + break + gr = Git(os.path.join(res_dir, repo_name)) + return current_commit, gr + +def find_sequences_rec(gr, commit, seq, seq_list, starting_points): + commit_date = commit.committer_date.replace(tzinfo=None) + if commit_date < begin: + if len(seq) > skipSeqShorterEq: + print("found seq of length: " + str(len(seq))) + seq_list.insert(0,seq) + elif commit.merge: + seq.insert(0,commit.hash) + if len(seq) > skipSeqShorterEq: + print("found seq of length: " + str(len(seq))) + seq_list.insert(0,seq) + for ph in commit.parents: + parent_commit = gr.get_commit(ph) + if ph not in starting_points: + starting_points.insert(0,ph) + find_sequences_rec(gr, parent_commit, [], seq_list, starting_points) + else: + seq.insert(0,commit.hash) + for p in commit.parents: + parent_commit = gr.get_commit(p) + find_sequences_rec(gr, parent_commit, seq, seq_list, starting_points) + +def find_sequences(): + seq_list = [] + starting_points=[] + start_commit, gr = start_commit_for_sequence_search() + starting_points.insert(0,start_commit.hash) + find_sequences_rec(gr, start_commit, [], seq_list, starting_points) + seq_list.sort(key=len, reverse=True) + print("summary") + total = 0 + maxlen = max(map(lambda x : len(x), seq_list)) + for i in range(0,maxlen + 1): + c = sum(map(lambda x : len(x) == i, seq_list)) + total += c + print("length " + str(i) + ": " + str(c)) + print("total: " + str(len(seq_list))) + assert(total == len(seq_list)) + print("avg len: " + str(sum(map(lambda x : len(x), seq_list))/len(list(map(lambda x : len(x), seq_list))))) + with open('sequences.json', 'w') as outfile: + json.dump(seq_list, outfile, indent=4) + return seq_list + +# returns the file where the incremental results are stored for comparison +def incremental_analyze(commit, out_commit, out_dir_name, incremental_dir, compare_data_file, gr, repo_path, conf, add_options): + # analyze commit incrementally based on the previous commit and save run for comparison + # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') + out_incr = os.path.join(out_commit, out_dir_name) + os.makedirs(out_incr) + file_incremental_run = os.path.join(out_incr, compare_data_file) + add_options = add_options + ['--set','incremental.load-dir', incremental_dir, '--set','incremental.save-dir', incremental_dir, '--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) + return file_incremental_run + +def analyze_series_in_repo(series): + prev_commit = "" + commit_num = 0 + repo_path = os.path.abspath(repo_name) + out_dir = os.path.abspath('out') + + incremental_data = "incremental_data_" + incr_data_dir = os.path.abspath(incremental_data + "incr") + incr_post_data_dir = os.path.abspath(incremental_data + "incr_post") + incr_post_rel_data_dir = os.path.abspath(incremental_data + "incr_post_rel") + + + with open('sequence.json', 'w') as outfile: + json.dump(series, outfile, indent=4) + dummy_c_file = "file.c" + with open(dummy_c_file, 'w') as file: + file.write("int main() { return 0; }") + file.close() + + for commit in Repository(url, since=begin, only_commits=series, clone_repo_to=os.getcwd()).traverse_commits(): + gr = Git(repo_path) + + # print("\n" + commit.hash) + # print('changed LOC: ', commit.lines) + # print('merge commit: ', commit.merge) + + # check that given series is a path of sequential commits in the repository + msg = "Commit " + prev_commit[:7] + "is not a parent commit of " + commit.hash[:7] + " (parents: " + ','.join(commit.parents) + ")" + assert (prev_commit == "" or prev_commit in commit.parents), msg + + relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude) + + # analyze + out_commit = os.path.join(out_dir, str(commit_num)) + os.makedirs(out_commit) + with open(os.path.join(out_commit,'commit_properties.log'), "w+") as file: + json.dump({"hash": commit.hash, "parent_hash": prev_commit, "CLOC": commit.lines, "relCLOC": relCLOC}, file) + + if commit_num == 0: + # analyze initial commit non-incrementally + try: + # print('Analyze ', str(commit.hash), ' as initial commit.') + add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options, files) + prev_commit = commit.hash + except utils.subprocess.CalledProcessError as e: + print('Aborted initial because command ', e.cmd, 'failed.') + print('Fix the problem or choose a different commit to start the accumulative analysis from.') + exit() + else: + # analyze every following commit based on the latest previous commit for which the analysis succeeded + try: + if os.path.isdir("backup_incremental_data"): + shutil.rmtree("backup_incremental_data") + shutil.copytree("incremental_data", "backup_incremental_data") + + # compare only for 10th and last run + if commit_num in compare_commits or commit_num == len(series) - 1: + # analyze commit non-incrementally and save run for comparison + # print('Analyze', str(commit.hash), 'non-incrementally (#', commit_num, ').') + out_nonincr = os.path.join(out_commit, 'non-incr') + os.makedirs(out_nonincr) + file_original_run = os.path.join(out_nonincr, "compare-data-nonincr") + add_options = ['--enable', 'incremental.only-rename', '--set', 'save_run', file_original_run] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options, files) + + # analyze commit incrementally based on the previous commit and save run for comparison + # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') + + file_incr_run = incremental_analyze(commit, out_commit, 'incr', incr_data_dir, "compare-data-incr", gr, repo_path, conf, []) + file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', incr_post_data_dir, "compare-data-incr-post", gr, repo_path, conf_incrpost, []) + reluctant_option = ['--enable', 'incremental.reluctant.enabled'] + file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', incr_post_rel_data_dir, "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option) + + if commit_num in compare_commits or commit_num == len(series) - 1: + # compare stored data of original and incremental run + # print('Compare both runs.') + out_compare = os.path.join(out_commit, 'compare') + os.makedirs(out_compare) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[0], conf, file_incr_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[1], conf, file_incr_post_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[2], conf, file_incr_rel_post_run, file_original_run) + + + except utils.subprocess.CalledProcessError as e: + print('Aborted because command ', e.cmd, 'failed.') + shutil.rmtree("incremental_data") + shutil.copytree("backup_incremental_data", "incremental_data") + + prev_commit = commit.hash + commit_num += 1 + +def runperprocess(core, seq_list, q): + psutil.Process().cpu_affinity([core]) + while not q.empty(): + i = q.get() + serie = seq_list[i] + dir = "series" + str(i) + os.mkdir(dir) + os.chdir(dir) + analyze_series_in_repo(serie) + os.chdir(res_dir) + +def analyze_seq_in_parallel(seq_list): + avail_phys_cores = psutil.cpu_count(logical=False) + allowedcores = avail_phys_cores - 1 + if numcores > allowedcores: + print("Not enough physical cores on this maching (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")") + exit() + # For equal load distribution, choose a processes to core mapping, + # use only physical cores and have an equal number of processes per cache. + # The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'. + # For our test server: + coremapping = [i for i in range(numcores - numcores//2)] + [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)] + processes = [] + + # set up Queue with each serie as task + q = mp.Queue() + for i in range(len(seq_list)): + q.put(i) + + for j in range(numcores): + # start process for analysing series on core j + c = coremapping[j] + p = mp.Process(target=runperprocess, args=[c, seq_list.copy(), q]) + p.start() + processes.append(p) + for p in processes: + p.join() + + +def merge_results(outfilename, suffix): + wd = os.getcwd() + seq_summaries = [] + result_sums = {str(i): {"precpertotal": {"equal": 0, "moreprec": 0, "lessprec": 0, "incomp": 0, "total": 0}, "number_of_commits": 0, "relCLOC": 0} for i in compare_commits} + num_seq = 0 + for s in map(lambda x: os.path.abspath(x), os.listdir(wd)): + if not os.path.isdir(s) or os.path.basename(s)[:6] != "series": + continue + num_seq += 1 + os.chdir(s) + with open('sequence.json', 'r') as file: + seq = json.load(file) + # lookup comparison results + outdir = os.path.join(s, "out") + commits = os.listdir(outdir) + commits.sort(key = lambda x: int(x)) + int_prec = {str(i): {"precision": None, "relCLOC": None} for i in compare_commits} + final_prec = None + relCLOC = 0 + + comparelog = utils.comparelog_with_suffix(suffix) + for i in filter(lambda x: x != "0", commits): + ith_dir = os.path.join(outdir, i) + compare_log_path = os.path.join(ith_dir, "compare", comparelog) + with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: + relCLOC += json.load(f)["relCLOC"] + if int(i) in compare_commits: + if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): + int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) + int_prec[i]["relCLOC"] = relCLOC + if int_prec[i]["precision"]: + result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} + result_sums[i]["number_of_commits"] += 1 + result_sums[i]["relCLOC"] += relCLOC + if int(i) != 0 and int(i) == len(commits) - 1: + if os.path.exists(compare_log_path): + final_prec = utils.extract_precision_from_compare_log(compare_log_path) + summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} + seq_summaries.append(summary) + os.chdir(wd) + result_avgs = {i: None for i in result_sums.keys()} + for i, ps in result_sums.items(): + if ps["number_of_commits"] != 0: + avg_prec = {k: ps["precpertotal"].get(k,0) / ps["number_of_commits"] for k in set(ps["precpertotal"])} + result_avgs[i] = {"precpertotal_avg": avg_prec, "relCLOC_avg": ps["relCLOC"] / ps["number_of_commits"]} + res = {"seq_summary": seq_summaries, "prec_avgs": result_avgs} + with open(outfilename, "w") as f: + json.dump(res, f, indent=4) + res + +def merge_all_results(): + print("\nmerge results") + for suffix in utils.compare_runs_suffixes: + results_filename = utils.precision_result_file_name_with_suffix(suffix) + merge_results(results_filename, suffix) + +if not only_collect_results: + os.mkdir(res_dir) +os.chdir(res_dir) + +if not only_collect_results: + print("find sequences to analyze") + seq_list = find_sequences() + + print("\nanalyze sequences in parallel") + analyze_seq_in_parallel(seq_list) + +merge_all_results() diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py new file mode 100644 index 000000000..ba2125cd5 --- /dev/null +++ b/scripts/incremental/benchmarking/projects.py @@ -0,0 +1,83 @@ +import os +import datetime + +class ProjectConfig: + url: str + repo_name: str + build_compdb: str + conf_base: str + conf_incrpost: str + begin: datetime.datetime + to: datetime.datetime + diff_exclude: list # list[str] + '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)''' + files: list # list[str] + branch: str + + def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files, branch): + self.url = url + self.repo_name = repo_name + self.build_compdb = build_compdb + self.conf_base = conf_base + self.conf_incrpost = conf_incrpost + self.begin = begin + self.to = to + self.diff_exclude = diff_exclude + self.files = files + self.branch = branch + +sqlite = ProjectConfig( + url = "https://github.com/sqlite/sqlite", + repo_name = "sqlite", + build_compdb = "../build/build_compdb_sqlite.sh", + conf_base = os.path.join("conf", "sqlite-minimal"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("conf", "sqlite-minimal-incrpostsolver"), + begin = datetime.datetime(2021,8,1), + to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) + diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], + files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'], + branch = "master" +) + +zstd = ProjectConfig( + url = "https://github.com/facebook/zstd", + repo_name = "zstd", + build_compdb = "../build/build_compdb_zstd.sh", + conf_base = os.path.join("conf", "zstd-race"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("conf", "zstd-race-incrpostsolver"), + begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" + to = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4) + diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], + files = None, + branch = "dev" +) + +figlet = ProjectConfig( + url = "https://github.com/cmatsuoka/figlet", + repo_name = "figlet", + build_compdb = None, + conf_base = os.path.join("conf", "figlet"), + conf_incrpost = os.path.join("conf", "figlet-incrpostsolver"), + begin = datetime.datetime(2010,1,1), + to = datetime.datetime(2022,10,10), + diff_exclude = [], + files = ['Makefile'], + branch = "master" +) + +chrony = ProjectConfig( + # Official repo is at https://git.tuxfamily.org/chrony/chrony.git, + # but does not allow multiple parallel clones. So use mirror on GitHub. + url="https://github.com/mlichvar/chrony.git", + repo_name="chrony", + build_compdb="../build/build_compdb_chrony.sh", + conf_base=os.path.join("conf", "chrony"), + conf_incrpost=os.path.join("conf", "chrony-incrpostsolver"), + begin=datetime.datetime(2020, 1, 1), + to=datetime.datetime(2022, 10, 10), + diff_exclude=[], + files=None, + branch = "master" +) + +projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd} diff --git a/scripts/incremental/benchmarking/requirements.txt b/scripts/incremental/benchmarking/requirements.txt new file mode 100644 index 000000000..29e5c8733 --- /dev/null +++ b/scripts/incremental/benchmarking/requirements.txt @@ -0,0 +1,8 @@ +brokenaxes==0.5.0 +matplotlib==3.5.1 +numpy>=1.19.5 +pandas==1.4.1 +psutil==5.9.0 +PyDriller==2.1 +pytz==2021.1 +compiledb>=0.10.1 diff --git a/scripts/incremental/benchmarking/run_efficiency.sh b/scripts/incremental/benchmarking/run_efficiency.sh new file mode 100755 index 000000000..e6a284312 --- /dev/null +++ b/scripts/incremental/benchmarking/run_efficiency.sh @@ -0,0 +1,17 @@ +#!/bin/bash +ANALYZER_DIR=$1 + +#Number of cores to be used +NCORES=$2 + +echo "Starting run on figlet" +python3 efficiency.py $ANALYZER_DIR figlet $NCORES +mv result_efficiency result_efficiency_figlet + +echo "Starting run on chrony" +python3 efficiency.py $ANALYZER_DIR chrony $NCORES +mv result_efficiency result_efficiency_chrony + +echo "Starting run on zstd" +python3 efficiency.py $ANALYZER_DIR zstd $NCORES +mv result_efficiency result_efficiency_zstd diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh new file mode 100755 index 000000000..c0f8032ee --- /dev/null +++ b/scripts/incremental/benchmarking/run_precision.sh @@ -0,0 +1,13 @@ +#!/bin/bash +ANALYZER_DIR=$1 + +#Number of cores to be used +NCORES=$2 + +# echo "Starting run on figlet" +# python3 precision.py $ANALYZER_DIR figlet $NCORES +# mv result_precision result_precision_figlet + +echo "Starting run on zstd" +python3 precision.py $ANALYZER_DIR zstd $NCORES +mv result_precision result_precision_zstd diff --git a/scripts/incremental/benchmarking/stats.py b/scripts/incremental/benchmarking/stats.py new file mode 100644 index 000000000..3a2909ec3 --- /dev/null +++ b/scripts/incremental/benchmarking/stats.py @@ -0,0 +1,65 @@ +import utils +from pydriller import Repository +from datetime import datetime +import os +import sys + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Wrong number of parameters.\nUse script like this: python3 incremental_stats.py ") + exit() + +analyzer_dir = sys.argv[1] +url = 'https://github.com/facebook/zstd' +repo_name = 'zstd' +begin = datetime(2021,8,1) +to = datetime(2022,2,1) +maxCLOC = 50 +dirs_to_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] + +cwd = os.getcwd() +outdir = os.path.join(cwd, 'out') +repo_path = os.path.normpath(os.path.join(cwd, repo_name)) +paths_to_exclude = list(map(lambda x: os.path.join(repo_path, x), dirs_to_exclude)) + +analyzed_commits = {} +total_commits = 0 +count_nochanges = 0 +count_merge = 0 +count_big = 0 +count_small = 0 + +def iter_repo(): + global analyzed_commits + global total_commits + global count_merge + global count_nochanges + global count_big + global count_small + + for commit in Repository(url, since=begin, to=to, clone_repo_to=cwd).traverse_commits(): + total_commits += 1 + + # count merge commits + if commit.merge: + count_merge += 1 + continue + + # count commits that have less than maxCLOC of relevant code changes + relCLOC = utils.calculateRelCLOC(repo_path, commit, paths_to_exclude) # use this to filter commits by actually relevant changes + if relCLOC == 0: + count_nochanges += 1 + continue + + if maxCLOC is not None and relCLOC > maxCLOC: + count_big += 1 + continue + + count_small += 1 + +iter_repo() +print("\nCommits traversed in total: ", total_commits) +print("Merge commits: ", count_merge) +print("Commits without any relevant changes: ", count_nochanges) +print("Big commits: ", count_big) +print("Small commits with relevant changes: ", count_small) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py new file mode 100644 index 000000000..cb29a652d --- /dev/null +++ b/scripts/incremental/benchmarking/utils.py @@ -0,0 +1,360 @@ +import os +import sys +import math +import shutil +from pathlib import Path +import subprocess +from pydriller import Git +import re +import pandas +import json +import numpy as np +import brokenaxes +import matplotlib as mpl +import matplotlib.ticker as mticker + +mpl.use("pgf") +mpl.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'pgf.rcfonts': False, + 'text.usetex': True, + 'font.family': 'serif', + 'font.size': 6, + 'axes.titlesize': 6, + 'legend.fontsize': 6, + 'figure.titlesize': 7, + 'figure.dpi': 300, + 'xtick.labelsize': 6, + 'ytick.labelsize': 6, + +}) +import matplotlib.pyplot as plt +from matplotlib.ticker import ScalarFormatter + +runtime_prefix = "Runtime" +analysis_prefix = "Analysis" +solving_prefix = "Solving" + +header_parent = " for parent commit (non-incremental)" +header_non_incr_child = " for commit (non-incremental)" +header_incr_child = " for commit (incremental)" +header_incr_posts_child = " for commit (incremental + incr postsolver)" +header_incr_posts_rel_child = " for commit (incremental + incr postsolver + reluctant)" + +runtime_header_parent = runtime_prefix + header_parent +runtime_header_non_incr_child = runtime_prefix + header_non_incr_child +runtime_header_incr_child = runtime_prefix + header_incr_child +runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child +runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child + +cpu_prefix = "CPU_" + +cpu_runtime_header_parent = cpu_prefix + runtime_header_parent +cpu_runtime_header_non_incr_child = cpu_prefix + runtime_header_non_incr_child +cpu_runtime_header_incr_child = cpu_prefix + runtime_header_incr_child +cpu_runtime_header_incr_posts_child = cpu_prefix + runtime_header_incr_posts_child +cpu_runtime_header_incr_posts_rel_child = cpu_prefix + runtime_header_incr_posts_rel_child + +analysis_header_parent = analysis_prefix + header_parent +analysis_header_non_incr_child = analysis_prefix + header_non_incr_child +analysis_header_incr_child = analysis_prefix + header_incr_child +analysis_header_incr_posts_child = analysis_prefix + header_incr_posts_child +analysis_header_incr_posts_rel_child = analysis_prefix + header_incr_posts_rel_child + +solving_header_parent = solving_prefix + header_parent +solving_header_non_incr_child = solving_prefix + header_non_incr_child +solving_header_incr_child = solving_prefix + header_incr_child +solving_header_incr_posts_child = solving_prefix + header_incr_posts_child +solving_header_incr_posts_rel_child = solving_prefix + header_incr_posts_rel_child + +preparelog = "prepare.log" +analyzerlog = "analyzer.log" +compare_runs_suffixes = ["incr", "incr_post", "incr_rel_post"] + +def comparelog_with_suffix (suffix): + return "compare_" +suffix + ".log" + +def reset_incremental_data(incr_data_dir): + if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir): + shutil.rmtree(incr_data_dir) + +def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options, files): + + gr.checkout(commit_hash) + conf_path = conf + + # Creat the analyze command + file_list = [] + if files: + def append_to_repo_path(file): + return os.path.join(repo_path, file) + file_list = list(map(append_to_repo_path, files)) + + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', conf_path, *file_list, *extra_options] + # If the list of files was empty, we pass the repo_path to goblint + if not files: + analyze_command.append(repo_path) + + # print configuration and analyze command + with open(outdir+'/config.out', "a+") as file: + with open(conf_path, "r") as c: + file.write("config: " + c.read()) + file.write("\n") + file.write("added options:\n") + for o in extra_options: + file.write(o + " ") + file.write("\n\n") + + file.write("analyze command:\n") + for c in analyze_command: + file.write(c + " ") + file.write("\n\n") + + file.write("Commit hash:\n" + commit_hash + "\n") + file.close() + + script_path = os.path.abspath(os.path.dirname(__file__)) + + # Prepare the repo + if build_compdb != None: + prepare_command = ['sh', os.path.join(script_path, build_compdb)] + with open(os.path.join(outdir, preparelog), "w+") as outfile: + subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + + # Run the analysis + with open(os.path.join(outdir, analyzerlog), "w+") as outfile: + subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + +def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2): + options = ['--conf', conf, '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] + analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file] + with open(os.path.join(outdir, comparelog_with_suffix(log_suffix)), "w+") as outfile: + subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + +def calculateRelCLOC(repo_path, commit, diff_exclude): + diff_exclude = list(map(lambda x: os.path.join(repo_path, x), diff_exclude)) + relcloc = 0 + for f in commit.modified_files: + _, extension = os.path.splitext(f.filename) + if not (extension == ".h" or extension == ".c"): + continue + filepath = f.new_path + if filepath is None: + filepath = f.old_path + parents = Path(filepath).parents + parents = list(map(lambda x: os.path.join(repo_path, x), parents)) + if any(dir in parents for dir in diff_exclude): + continue + relcloc = relcloc + f.added_lines + f.deleted_lines + return relcloc + +def find_line(pattern, log): + with open (log, 'r') as file: + for line in file: + m = re.search(pattern, line) + if m: + file.close() + return m.groupdict() + return None + +def extract_from_analyzer_log(log): + # First comes the cpu time (which is ignored); we look at the walltime. + runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' + + runtime = find_line(runtime_pattern, log) + + analysis_time = find_line(analysis_time_pattern, log) + solving_time = find_line(solving_time_pattern, log) + ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0} + d = dict(list(runtime.items()) + list(analysis_time.items()) + list(solving_time.items()) + list(ch.items())) + with open(log, "r") as file: + num_racewarnings = file.read().count('[Warning][Race]') + d["race_warnings"] = num_racewarnings + file.close() + return d + +def extract_precision_from_compare_log(log): + pattern = "equal: (?P[0-9]+), more precise: (?P[0-9]+), less precise: (?P[0-9]+), incomparable: (?P[0-9]+), total: (?P[0-9]+)" + precision = find_line(pattern, log) + return {k: int(v) for k,v in precision.items()} if precision else None + +def precision_result_file_name_with_suffix(suffix): + result_file_name = "results_" + suffix + ".json" + return result_file_name + +def barplot(df, figure_dir, outfile, figsize=None, colors=None): + df.plot.bar(rot=0, width=0.7, figsize=figsize, color=colors) + plt.xticks(rotation=45, ha='right', rotation_mode='anchor') + plt.xlabel('Commit') + plt.tight_layout() + + outfile = os.path.join(figure_dir, outfile) + plt.savefig(outfile) + +def get_cleaned_filtered_data(result_csv_file, changed_loc_filter, filterRelCLOC=False, filterDetectedChanges=False): + df = pandas.read_csv(result_csv_file, index_col='Commit', sep=";") + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + + # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis + # run failed) + df = df[(df[runtime_header_parent] != 0)] + df = df[changed_loc_filter(df["Relevant changed LOC"])] + if filterRelCLOC: + df = df[df["Relevant changed LOC"] > 0] + if filterDetectedChanges: + df = df[df["Changed/Added/Removed functions"] > 0] + return df + +def get_data_from_json(result_file): + with open(result_file) as f: + d = json.load(f) + df=pandas.json_normalize(d['seq_summary']) + return df + +def create_cum_data(dataFrame, num_bins, relColumns): + min = dataFrame[relColumns].min().min() + max = dataFrame[relColumns].max().max() + bins = np.linspace(min,max,num=num_bins+1) + data = [] + base = [] + for c in relColumns: + valuesc, basec = np.histogram(dataFrame.loc[:,c], bins=bins) + base = basec + cum = np.cumsum(valuesc, dtype=float) + cum[cum==0] = np.nan + + # If there is a tail of values that are the same, set the ones after its first occurrence to NaN. + # In the resulting graph, this avoids the artefact that all the lines go up to the largest y-value of any line. + last = len(cum) - 1 + last_value = cum[last] + for i in range(last - 1 , 0, -1): + if cum[i] == last_value: + cum[i + 1] = np.nan + else: + break + + data = data + [cum] + return data, base[:-1] + +def largest_power_of_two_smaller(x): + p = math.floor(math.log2(x)) - 1 + p = max(1, p) + 2 ** p + +def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, title=None, logscale=False): + if figsize: + plt.figure(figsize=figsize) + else: + plt.figure() + min = sys.maxsize + + linestyle_tuple = [ + "solid", + "--", + (0, (10, 1)), # long dash + (0, (3, 1, 1, 1)) # dash dots + ] + for d in data_sets: + min_d = d["values"].min() + if min_d < min: + min = min_d + plt.plot(d["values"], base, linestyle=linestyle_tuple.pop(0), label=d["label"]) + plt.xlabel('Number of Commits') + if logscale: + plt.ylabel('Runtime in s ($log_{2}$ scale)') + plt.yscale('log', base=2) + plt.gca().yaxis.set_major_formatter(ScalarFormatter()) + plt.xlim(left=0) + plt.ylim(bottom=largest_power_of_two_smaller(min)) + #plt.yticks(np.arange(100,1500,100)) + else: + plt.ylabel('Runtime in s') + plt.tight_layout() + plt.legend() + plt.title(title) + + outfile = os.path.join(figure_dir, outfile) + plt.savefig(outfile) + +def hist_plot(data, step, title, xlabel, ylabel, outfile, size, xlim_left=None, xlim_right=None, cutoffs=None): + min = data.min() + max = data.max() + min = min//step + max = max//step + 1 + bins = np.arange(min*step,(max+1)*step,step) + + if cutoffs: + plt.figure() + bax = brokenaxes.brokenaxes(ylims=cutoffs, hspace=0.05, left = 0.18, bottom = 0.16) + bax.hist(data, bins, histtype='bar') + plt.xlabel(xlabel, labelpad=0) + plt.ylabel(ylabel, labelpad=0) + if title: plt.title(title) + plt.savefig(outfile, bbox_inches='tight') + else: + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + plt.hist(data, bins) + if xlim_left: + plt.xlim(left=xlim_left, right=xlim_right) + else: + plt.xlim(right=xlim_right) + if xlabel: plt.xlabel(xlabel) + if ylabel: plt.ylabel(ylabel) + if title: plt.title(title) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) + +def hist_subplots(ax, data, step): + min = data.min() + max = data.max() + min = min//step + max = max//step + 1 + bins = np.arange(min*step,(max+1)*step,step) + ax.hist(data, bins) + +def four_hist_subplots(data, title, xlabel, ylabel, outfile): + step = 0.01 + fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2,tight_layout=True) + for i, ax in enumerate([ax1,ax2,ax3,ax4]): + hist_subplots(ax, data, step) + ax.title.set_text(title[i]) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.tight_layout() + fig.savefig(outfile) + +def scatter_plot(data, xlabel, ylabel, outfile, size): + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + colors=['red','azure','blue','brown','chartreuse','chocolate','darkblue','darkgreen','seagreen','green','indigo','orangered','orange','coral','olive','mediumseagreen','grey','teal'] + #markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P'] + linestyles = ['dashed'] + for i, (x, y) in enumerate(data): + plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)]) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1)) + # plt.xticks([1,2,5,10,15]) + plt.ylim(bottom=-0.005, top=0.3) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) + +def box_plot(data, x, xlabel, ylabel, outfile, size): + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + plt.boxplot(data, flierprops=dict(markersize=3), positions=x) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) + print(outfile) diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh new file mode 100755 index 000000000..f222e093a --- /dev/null +++ b/scripts/incremental/build/build_compdb_chrony.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +git clean -fdx +./configure +make -j 1 chronyd | tee build.log +compiledb --parse build.log +# ./configure && bear -- make chronyd +sed -i -E 's/#define NTP_ERA_SPLIT \([0-9]+LL/#define NTP_ERA_SPLIT \(1671796396LL/' config.h diff --git a/scripts/incremental/build/build_compdb_figlet.sh b/scripts/incremental/build/build_compdb_figlet.sh new file mode 100755 index 000000000..7dc7672cc --- /dev/null +++ b/scripts/incremental/build/build_compdb_figlet.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +# do nothing diff --git a/scripts/incremental/build/build_compdb_sqlite.sh b/scripts/incremental/build/build_compdb_sqlite.sh new file mode 100755 index 000000000..65ef7ce04 --- /dev/null +++ b/scripts/incremental/build/build_compdb_sqlite.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile +sh configure +LC_ALL=C.UTF-8 compiledb make -j 1 sqlite3.lo diff --git a/scripts/incremental/build/build_compdb_zstd.sh b/scripts/incremental/build/build_compdb_zstd.sh new file mode 100755 index 000000000..baedce33c --- /dev/null +++ b/scripts/incremental/build/build_compdb_zstd.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile +LC_ALL=C.UTF-8 compiledb make -j 1 zstd