From 24ea564805c235d371388f63711e77ec44113605 Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Mon, 23 Dec 2019 17:25:36 -0300 Subject: [PATCH 1/6] Refactor fetch_jars script --- scripts/fetch_jars.py | 220 +++++++++++++++++++++++++----------------- 1 file changed, 130 insertions(+), 90 deletions(-) diff --git a/scripts/fetch_jars.py b/scripts/fetch_jars.py index 074d093c7..c8bf4c7a9 100644 --- a/scripts/fetch_jars.py +++ b/scripts/fetch_jars.py @@ -4,106 +4,128 @@ import subprocess import time import shutil -import os +from os import path +from os import listdir +import csv PATH = "path" NAME = "name" +BRANCH = "branch" +STATE = "state" + +MERGE_COMMIT = "merge commit" RESULT = "result" GITHUB_API= "https://api.github.com" TRAVIS_API = "https://api.travis-ci.org" LOGIN = "login" -DOWNLOAD_URL='browser_download_url' +BROWSER_DOWNLOAD_URL='browser_download_url' ASSETS="assets" MESSAGE_PREFIX="Trigger build #" RELEASE_PREFIX= "fetchjar-" +FINISHED = "finished" +UNTAGGED = "untagged" -inputPath = sys.argv[1] # input path passed as cli argument -outputPath = sys.argv[2] # output path passed as cli argument +input_path = sys.argv[1] # input path passed as cli argument +output_path = sys.argv[2].rstrip("/") # output path passed as cli argument token = sys.argv[3] # token passed as cli argument -def fetchJars(inputPath, outputPath, token): +def fetch_jars(input_path, output_path, token): # this method reads a csv input file, with the projects name and path # for each project it downloads the build generated via github releases # and moves the builds to the output generated by the framework - print("Starting build collection") - tokenUser = get_github_user(token)[LOGIN] + token_user_name = get_github_user(token)[LOGIN] - parsedInput = read_input(inputPath) - parsedOutput = read_output(outputPath) - newResultsFile = [] + parsed_input = read_csv(input_path, ",") + parsed_output = read_csv(output_path + "/data/results.csv", ";") + + parsed_output_hash = output_to_hash(parsed_output) - for project in parsedInput: + new_results_file = [] - splitedProjectPath = project[PATH].split('/') - projectName = splitedProjectPath[len(splitedProjectPath) - 1] - githubProject = tokenUser + '/' + projectName - print (projectName) + for project in parsed_input: - get_builds_and_wait(githubProject) + splited_project_path = project[PATH].split('/') + project_name = splited_project_path[len(splited_project_path) - 1] + github_project = token_user_name + '/' + project_name + print (project_name) - releases = get_github_releases(token, githubProject) + get_builds_and_wait(github_project) + releases = get_github_releases(token, github_project) # download the releases for the project moving them to the output directories for release in releases: # check if release was generated by the framework if (release[NAME].startswith(RELEASE_PREFIX)): - commitSHA = release[NAME].replace(RELEASE_PREFIX, '') - print ("Downloading " + commitSHA ) + commit_sha = strip_release_prefix(release) + + # preparing and downloading build + print ("Scenario: " + commit_sha) + try: - downloadPath = mount_download_path(outputPath, project, commitSHA) - downloadUrl = release[ASSETS][0][DOWNLOAD_URL] - download_file(downloadUrl, downloadPath) - if (commitSHA in parsedOutput): - newResultsFile.append(parsedOutput[commitSHA]) - untar_and_remove_file(downloadPath) - print (downloadPath + ' is ready') - except: - pass + download_build(output_path, project, commit_sha, release) + + new_results_file.append(parsed_output_hash[commit_sha]) - remove_commit_files_without_builds (outputPath, projectName) + print ("Scenario is ready") + except Exception as e: + print ("Error downloading scenario: " + str(e)) + + remove_commit_files_without_builds (output_path, project_name) - with open(outputPath + "/data/results-with-builds.csv", 'w') as outputFile: - outputFile.write("project;merge commit;className;method;left modifications;left deletions;right modifications;right deletions\n") - outputFile.write("\n".join(newResultsFile)) - outputFile.close() - -def read_output(outputPath): - fo = open(outputPath + "/data/results.csv") - file = fo.read() - fo.close() - - fileOutLines = file.split("\n") - return parse_output(fileOutLines) - -def parse_output(lines): - result = {} - for line in lines[1:]: - cells = line.split(";") - if (len (cells) > 1): - result[cells[1]] = line - return result - -def read_input(inputPath): - f = open(inputPath, "r") - file = f.read() - f.close() - - bruteLines = file.split("\n") - return parse_input(bruteLines) - -def parse_input(lines): - # parse framework input csv file - result = [] - for line in lines[1:]: - cells = line.split(",") - if (len (cells) > 1): - method = {} - method[NAME] = cells[0] - method[PATH] = cells[1] - result.append(method) - return result + save_results_with_builds(output_path, new_results_file) + +def download_build(output_path, project, commit_sha, release): + print ("Downloading") + scenario_path = mount_scenario_path(output_path, project, commit_sha) + + if path.exists(scenario_path): + tar_path = scenario_path + "result.tar.gz" + + build_path = scenario_path + "build" + + if path.exists(tar_path): + raise Exception(tar_path + " already exists") + + if path.exists(build_path): + raise Exception(build_path + " already exists") + + download_url = get_download_url(release) + + download_file(download_url, tar_path) + + untar_and_remove_file(tar_path, build_path) + else: + raise Exception("Scenario folder: " + scenario_path + " doesn't exist") + +def output_to_hash(parsed_output): + parsed_output_hash = {} + + for scenario in parsed_output: + parsed_output_hash[scenario[MERGE_COMMIT]] = scenario + + return parsed_output_hash + +def strip_release_prefix(release): + return release[NAME].replace(RELEASE_PREFIX, '') + +def get_download_url(release): + return release[ASSETS][0][BROWSER_DOWNLOAD_URL] + +def save_results_with_builds(output_path, new_results_file): + with open(output_path + "/data/results-with-builds.csv", 'w') as outputFile: + csv_writer = csv.DictWriter(outputFile, delimiter=";", + fieldnames=["project","merge commit","className","method","left modifications","left deletions","right modifications","right deletions"]) + + csv_writer.writeheader() + for scenario in new_results_file: + csv_writer.writerow(scenario) + + +def read_csv(file_path, delimiter): + with open(file_path, "r") as input_lines: + return list(csv.DictReader(input_lines, delimiter=delimiter)) def download_file(url, target_path): # download file from url @@ -111,28 +133,29 @@ def download_file(url, target_path): if response.status_code == 200: with open(target_path, 'wb') as f: f.write(response.raw.read()) + else: + raise Exception("Download request returned status code: " + response.status_code) -def mount_download_path(outputPath, project, commitSHA): +def mount_scenario_path(output_path, project, commit_sha): # mount path where the downloaded build will be moved to - return outputPath + '/files/' + project[NAME] + '/' + commitSHA + '/result.tar.gz' + return output_path + '/files/' + project[NAME] + '/' + commit_sha + "/" -def untar_and_remove_file(downloadPath): - downloadDir = downloadPath.replace('result.tar.gz', '') - subprocess.call(['mkdir', downloadDir + 'build']) - subprocess.call(['tar', '-xf', downloadPath, '-C', downloadDir + '/build', ]) - subprocess.call(['rm', downloadPath]) +def untar_and_remove_file(tar_path, output_path): + subprocess.call(['mkdir', output_path]) + subprocess.call(['tar', '-xf', tar_path, '-C', output_path ]) + subprocess.call(['rm', tar_path]) def get_builds_and_wait(project): has_pendent = True filtered_builds = [] while (has_pendent): builds = get_travis_project_builds(project) - filtered_builds = filter (lambda x: not x["branch"].startswith("untagged"), builds) + filtered_builds = filter (lambda x: not x[BRANCH].startswith(UNTAGGED), builds) has_pendent = False for build in filtered_builds: - print (build["state"]) - has_pendent = has_pendent or (build["state"] != "finished") + print (build[BRANCH] + " status: " + build[STATE] ) + has_pendent = has_pendent or (build[STATE] != FINISHED) if (has_pendent): print ("Waiting 30 seconds") @@ -142,34 +165,51 @@ def get_builds_and_wait(project): def get_travis_project_builds(project): - return requests.get(TRAVIS_API + '/repos/' + project + '/builds').json() + try: + res = requests.get(TRAVIS_API + '/repos/' + project + '/builds') + res.raise_for_status() + return res.json() + except Exception as e: + raise Exception("Error getting travis builds: " + str(e)) + def get_github_user(token): - return requests.get(GITHUB_API + '/user', headers=get_headers(token)).json() + try: + res = requests.get(GITHUB_API + '/user', headers=get_headers(token)) + res.raise_for_status() + + return res.json() + except Exception as e: + raise Exception("Error getting github user: " + str(e)) def get_github_releases(token, project): - return requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token)).json() + try: + res = requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token)) + res.raise_for_status() + + return res.json() + except Exception as e: + raise Exception("Error getting github releases: " + str(e)) def get_headers(token): return { "Authorization": "token " + token } +def remove_commit_files_without_builds (output_path, project_name): + files_path = output_path + "/files/" + project_name + "/" -def remove_commit_files_without_builds (outputPath, projectName): - files_path = outputPath + "/files/" + projectName + "/" - - if (os.path.exists(files_path)): - commit_dirs = os.listdir(files_path) + if (path.exists(files_path)): + commit_dirs = listdir(files_path) for directory in commit_dirs: commit_dir = files_path + directory build_dir = commit_dir + "/build" - if (not os.path.exists(build_dir)): + if (not path.exists(build_dir)): shutil.rmtree(commit_dir) - if (len (os.listdir(files_path)) == 0 ): + if (len (listdir(files_path)) == 0 ): shutil.rmtree(files_path) -fetchJars(inputPath, outputPath, token) \ No newline at end of file +fetch_jars(input_path, output_path, token) \ No newline at end of file From c14279321c62b86f44a6249a8816813cf59e3497 Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Mon, 23 Dec 2019 17:33:58 -0300 Subject: [PATCH 2/6] Add try catch in project level --- scripts/fetch_jars.py | 53 +++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/scripts/fetch_jars.py b/scripts/fetch_jars.py index c8bf4c7a9..a6e0f3b1d 100644 --- a/scripts/fetch_jars.py +++ b/scripts/fetch_jars.py @@ -4,8 +4,7 @@ import subprocess import time import shutil -from os import path -from os import listdir +from os import path, listdir import csv PATH = "path" @@ -45,34 +44,37 @@ def fetch_jars(input_path, output_path, token): new_results_file = [] for project in parsed_input: - splited_project_path = project[PATH].split('/') project_name = splited_project_path[len(splited_project_path) - 1] github_project = token_user_name + '/' + project_name - print (project_name) + + try: + print (project_name) - get_builds_and_wait(github_project) + get_builds_and_wait(github_project) - releases = get_github_releases(token, github_project) - # download the releases for the project moving them to the output directories - for release in releases: - # check if release was generated by the framework - if (release[NAME].startswith(RELEASE_PREFIX)): - commit_sha = strip_release_prefix(release) + releases = get_github_releases(token, github_project) + # download the releases for the project moving them to the output directories + for release in releases: + # check if release was generated by the framework + if (release[NAME].startswith(RELEASE_PREFIX)): + commit_sha = strip_release_prefix(release) - # preparing and downloading build - print ("Scenario: " + commit_sha) + # preparing and downloading build + print ("Scenario: " + commit_sha) - try: - download_build(output_path, project, commit_sha, release) - - new_results_file.append(parsed_output_hash[commit_sha]) - - print ("Scenario is ready") - except Exception as e: - print ("Error downloading scenario: " + str(e)) + try: + download_build(output_path, project, commit_sha, release) + + new_results_file.append(parsed_output_hash[commit_sha]) + + print ("Scenario is ready") + except Exception as e: + print ("Error downloading scenario: " + str(e)) - remove_commit_files_without_builds (output_path, project_name) + remove_commit_files_without_builds (output_path, project_name) + except Exception as e: + print ("Error fetching builds for project " + project_name + ": " + str(e)) save_results_with_builds(output_path, new_results_file) @@ -165,8 +167,9 @@ def get_builds_and_wait(project): def get_travis_project_builds(project): + res = requests.get(TRAVIS_API + '/repos/' + project + '/builds') + try: - res = requests.get(TRAVIS_API + '/repos/' + project + '/builds') res.raise_for_status() return res.json() @@ -174,8 +177,8 @@ def get_travis_project_builds(project): raise Exception("Error getting travis builds: " + str(e)) def get_github_user(token): + res = requests.get(GITHUB_API + '/user', headers=get_headers(token)) try: - res = requests.get(GITHUB_API + '/user', headers=get_headers(token)) res.raise_for_status() return res.json() @@ -183,8 +186,8 @@ def get_github_user(token): raise Exception("Error getting github user: " + str(e)) def get_github_releases(token, project): + res = requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token)) try: - res = requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token)) res.raise_for_status() return res.json() From c01e2927ef54944755deb97354519f552f251aaf Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Mon, 23 Dec 2019 20:03:05 -0300 Subject: [PATCH 3/6] Add more error handling --- scripts/fetch_jars.py | 97 +++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/scripts/fetch_jars.py b/scripts/fetch_jars.py index a6e0f3b1d..b3012518d 100644 --- a/scripts/fetch_jars.py +++ b/scripts/fetch_jars.py @@ -34,53 +34,60 @@ def fetch_jars(input_path, output_path, token): # and moves the builds to the output generated by the framework print("Starting build collection") - token_user_name = get_github_user(token)[LOGIN] - - parsed_input = read_csv(input_path, ",") - parsed_output = read_csv(output_path + "/data/results.csv", ";") - - parsed_output_hash = output_to_hash(parsed_output) - - new_results_file = [] + try: + token_user_name = get_github_user(token)[LOGIN] - for project in parsed_input: - splited_project_path = project[PATH].split('/') - project_name = splited_project_path[len(splited_project_path) - 1] - github_project = token_user_name + '/' + project_name + parsed_input = read_csv(input_path, ",") + parsed_output = read_csv(output_path + "/data/results.csv", ";") - try: - print (project_name) - - get_builds_and_wait(github_project) - - releases = get_github_releases(token, github_project) - # download the releases for the project moving them to the output directories - for release in releases: - # check if release was generated by the framework - if (release[NAME].startswith(RELEASE_PREFIX)): - commit_sha = strip_release_prefix(release) - - # preparing and downloading build - print ("Scenario: " + commit_sha) - - try: - download_build(output_path, project, commit_sha, release) - - new_results_file.append(parsed_output_hash[commit_sha]) + parsed_output_hash = output_to_hash(parsed_output) + + new_results_file = [] + + for project in parsed_input: + splited_project_path = project[PATH].split('/') + project_name = splited_project_path[len(splited_project_path) - 1] + github_project = token_user_name + '/' + project_name + + # check if framework used optional custom name + if project[NAME]: + project_name = project[NAME] + + try: + print (project_name) + + get_builds_and_wait(github_project) + + releases = get_github_releases(token, github_project) + # download the releases for the project moving them to the output directories + for release in releases: + # check if release was generated by the framework + if (release[NAME].startswith(RELEASE_PREFIX)): + commit_sha = strip_release_prefix(release) + + # preparing and downloading build + print ("Scenario: " + commit_sha) + + try: + download_build(output_path, project, commit_sha, release) + + new_results_file.append(parsed_output_hash[commit_sha]) + + print ("Scenario is ready") + except Exception as e: + print ("Error downloading scenario: " + str(e)) + + remove_commit_files_without_builds (output_path, project_name) + except Exception as e: + print ("Error fetching builds for project " + project_name + ": " + str(e)) - print ("Scenario is ready") - except Exception as e: - print ("Error downloading scenario: " + str(e)) - - remove_commit_files_without_builds (output_path, project_name) - except Exception as e: - print ("Error fetching builds for project " + project_name + ": " + str(e)) - - save_results_with_builds(output_path, new_results_file) + save_results_with_builds(output_path, new_results_file) + except Exception as e: + print (e) -def download_build(output_path, project, commit_sha, release): +def download_build(output_path, project_name, commit_sha, release): print ("Downloading") - scenario_path = mount_scenario_path(output_path, project, commit_sha) + scenario_path = mount_scenario_path(output_path, project_name, commit_sha) if path.exists(scenario_path): tar_path = scenario_path + "result.tar.gz" @@ -110,7 +117,7 @@ def output_to_hash(parsed_output): return parsed_output_hash def strip_release_prefix(release): - return release[NAME].replace(RELEASE_PREFIX, '') + return release[NAME].replace(RELEASE_PREFIX, "") def get_download_url(release): return release[ASSETS][0][BROWSER_DOWNLOAD_URL] @@ -138,9 +145,9 @@ def download_file(url, target_path): else: raise Exception("Download request returned status code: " + response.status_code) -def mount_scenario_path(output_path, project, commit_sha): +def mount_scenario_path(output_path, project_name, commit_sha): # mount path where the downloaded build will be moved to - return output_path + '/files/' + project[NAME] + '/' + commit_sha + "/" + return output_path + '/files/' + project_name + '/' + commit_sha + "/" def untar_and_remove_file(tar_path, output_path): subprocess.call(['mkdir', output_path]) From 13c865ef86bdd70377f8cc749eb9d1a0a99aa674 Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Wed, 25 Dec 2019 20:52:16 -0300 Subject: [PATCH 4/6] Modify fetch_jars script to support same repository with differnt names projects --- scripts/fetch_jars.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/scripts/fetch_jars.py b/scripts/fetch_jars.py index 12541aff3..40fb97340 100644 --- a/scripts/fetch_jars.py +++ b/scripts/fetch_jars.py @@ -12,6 +12,8 @@ PATH = "path" NAME = "name" +FORK_URL = "fork_url" + BRANCH = "branch" STATE = "state" @@ -43,19 +45,14 @@ def fetch_jars(input_path, output_path, token): parsed_input = read_csv(input_path, ",") parsed_output = read_csv(output_path + "/data/results.csv", ";") + projects = map(lambda p: process_project(p, token_user_name), parsed_input) parsed_output_hash = output_to_hash(parsed_output) - print (parsed_output_hash) new_results_file = [] - for project in parsed_input: - splited_project_path = project[PATH].split('/') - project_name = splited_project_path[len(splited_project_path) - 1] - github_project = token_user_name + '/' + project_name - - # check if framework used optional custom name - if project[NAME]: - project_name = project[NAME] + for project in projects: + github_project = project[FORK_URL] + project_name = project[NAME] try: print (project_name) @@ -75,7 +72,7 @@ def fetch_jars(input_path, output_path, token): try: download_build(output_path, project_name, commit_sha, release) - new_results_file.append(parsed_output_hash[commit_sha]) + new_results_file.append(parsed_output_hash[project_name + commit_sha]) print ("Scenario is ready") except Exception as e: @@ -89,6 +86,20 @@ def fetch_jars(input_path, output_path, token): except Exception as e: print (e) +def process_project(data, token_user_name): + project = {} + + splited_project_path = data[PATH].split('/') + github_project_name = splited_project_path[len(splited_project_path) - 1] + github_project = token_user_name + '/' + github_project_name + # check if framework used optional custom name + + project[PATH] = data[PATH] + project[FORK_URL] = github_project + project[NAME] = data[NAME] if data[NAME] else github_project_name + + return project + def download_build(output_path, project_name, commit_sha, release): print ("Downloading") scenario_path = mount_scenario_path(output_path, project_name, commit_sha) @@ -116,7 +127,7 @@ def output_to_hash(parsed_output): parsed_output_hash = {} for scenario in parsed_output: - parsed_output_hash[scenario[scenario[MERGE_COMMIT]] = scenario + parsed_output_hash[scenario[PROJECT] + scenario[MERGE_COMMIT]] = scenario return parsed_output_hash From 58bc3c3210410d98a878f8b2436a2a6234a48c76 Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Wed, 25 Dec 2019 21:05:19 -0300 Subject: [PATCH 5/6] Threat already exists errors a better way --- scripts/fetch_jars.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/fetch_jars.py b/scripts/fetch_jars.py index 40fb97340..d36729194 100644 --- a/scripts/fetch_jars.py +++ b/scripts/fetch_jars.py @@ -33,6 +33,13 @@ output_path = sys.argv[2].rstrip("/") # output path passed as cli argument token = sys.argv[3] # token passed as cli argument +class AlreadyExistsException(Exception): + def __init__(self, file_path): + self.file_path = file_path + self.message = file_path + " already exists" + def __str__(self): + return self.message + def fetch_jars(input_path, output_path, token): # this method reads a csv input file, with the projects name and path # for each project it downloads the build generated via github releases @@ -75,6 +82,10 @@ def fetch_jars(input_path, output_path, token): new_results_file.append(parsed_output_hash[project_name + commit_sha]) print ("Scenario is ready") + except AlreadyExistsException as ae: + new_results_file.append(parsed_output_hash[project_name + commit_sha]) + + print (ae) except Exception as e: print ("Error downloading scenario: " + str(e)) @@ -110,10 +121,10 @@ def download_build(output_path, project_name, commit_sha, release): build_path = scenario_path + "build" if path.exists(tar_path): - raise Exception(tar_path + " already exists") + raise AlreadyExistsException(tar_path) if path.exists(build_path): - raise Exception(build_path + " already exists") + raise AlreadyExistsException(build_path) download_url = get_download_url(release) From ba45ed62bb2f8c513bf5b9db03f1eb60107f8d23 Mon Sep 17 00:00:00 2001 From: Rafael Mota Date: Wed, 25 Dec 2019 22:28:49 -0300 Subject: [PATCH 6/6] Refactor and make parse_to_soot script more robust --- scripts/parse_to_soot.py | 94 ++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/scripts/parse_to_soot.py b/scripts/parse_to_soot.py index 6255a4edd..d8b832696 100644 --- a/scripts/parse_to_soot.py +++ b/scripts/parse_to_soot.py @@ -1,71 +1,59 @@ # This script receives as input the path to a directory generated by the miningframework, it reads the output files and creates a [output]/data/results-soot.csv with the output in a format suported by a SOOT analysis framework import sys +from csv import DictReader, writer -CLASS_NAME = "class_name" -LEFT_MODIFICATION = "leftModification" -RIGHT_MODIFICATION = "rightModfication" -COMMIT_SHA = "commitSha" -PROJECT_NAME = "projectName" +CLASS_NAME = "className" +LEFT_MODIFICATIONS = "left modifications" +RIGHT_MODIFICATIONS = "right modifications" +COMMIT_SHA = "merge commit" +PROJECT_NAME = "project" -output_path = sys.argv[1] # get output path passed as cli argument +output_path = sys.argv[1].rstrip("/") # get output path passed as cli argument def export_csv(): - f = open(output_path + "/data/results.csv", "r") - file = f.read() - f.close() + print ("Running parse to soot") + scenarios = read_output(output_path) + + for scenario in scenarios: + base_path = get_scenario_base_path(scenario) - brute_lines = file.split("\n") + left_modifications = parse_modifications(scenario[LEFT_MODIFICATIONS]) + right_modifications = parse_modifications(scenario[RIGHT_MODIFICATIONS]) + class_name = scenario[CLASS_NAME] - parsed = parse_output(brute_lines) - csv = generate_csv(parsed) + result = [] + result_reverse = [] + + for line in left_modifications: + if line not in right_modifications: + result.append([class_name, "sink", line]) + result_reverse.append([class_name, "source", line]) + + for line in right_modifications: + if line not in left_modifications: + result.append([class_name, "source", line]) + result_reverse.append([class_name, "sink", line]) + if result: + with open(base_path + "/soot.csv", "w") as soot, open(base_path + "/soot-reverse.csv", "w") as soot_reverse: + soot_writer = writer(soot, delimiter=",") + soot_reverse_writer = writer(soot_reverse, delimiter=",") -def parse_output(lines): - result = [] - for line in lines[1:]: - cells = line.split(";") - if (len (cells) > 1): - method = {} - method[PROJECT_NAME] = cells[0] - method[COMMIT_SHA] = cells[1] - method[CLASS_NAME] = cells[2] - method[LEFT_MODIFICATION] = parse_modification(cells[4]) - method[RIGHT_MODIFICATION] = parse_modification(cells[6]) - result.append(method) - return result + if result: + soot_writer.writerows(result) + soot_reverse_writer.writerows(result_reverse) -def parse_modification(modifications): +def read_output(output_path): + with open(output_path + "/data/results-with-builds.csv", "r") as output_file: + return list(DictReader(output_file, delimiter=";")) + +def parse_modifications(modifications): trimmed_input = modifications.strip("[]").replace(" ", "") if (len (trimmed_input) > 0): return trimmed_input.split(",") return [] -def generate_csv(collection): - for elem in collection: - result = [] - result_reverse = [] - class_name = elem[CLASS_NAME] - left_modifications = elem[LEFT_MODIFICATION] - right_modifications = elem[RIGHT_MODIFICATION] - for l in left_modifications: - if l not in right_modifications: - result_reverse.append(class_name + ",sink," + l) - result.append(class_name + ",source," + l) - for r in right_modifications: - if r not in left_modifications: - result_reverse.append(class_name + ",source," + r) - result.append(class_name + ",sink," + r) - try: - if result: - base_path = output_path + "/files/" + elem[PROJECT_NAME] + "/" + elem[COMMIT_SHA] - save_file(base_path + "/soot.csv", result) - save_file(base_path + "/soot-reverse.csv", result_reverse) - except: - pass - -def save_file(filePath, result): - csv_file = open(filePath, "w") - csv_file.write("\n".join(result)) - csv_file.close() +def get_scenario_base_path(scenario): + return output_path + "/files/" + scenario[PROJECT_NAME] + "/" + scenario[COMMIT_SHA] export_csv() \ No newline at end of file