Skip to content

Commit

Permalink
Merge pull request #6 from spgroup/master
Browse files Browse the repository at this point in the history
Bring changes from master
  • Loading branch information
Rafael Mota Alves authored Dec 27, 2019
2 parents ed89d50 + 7599837 commit bb5ab40
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 153 deletions.
273 changes: 173 additions & 100 deletions scripts/fetch_jars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,135 +7,190 @@
import subprocess
import time
import shutil
import os
from os import path, listdir
import csv

PATH = "path"
NAME = "name"
FORK_URL = "fork_url"

BRANCH = "branch"
STATE = "state"

PROJECT = "project"
MERGE_COMMIT = "merge commit"
RESULT = "result"
GITHUB_API= "https://api.github.com"
TRAVIS_API = "https://api.travis-ci.org"
LOGIN = "login"
DOWNLOAD_URL='browser_download_url'
BROWSER_DOWNLOAD_URL='browser_download_url'
ASSETS="assets"
MESSAGE_PREFIX="Trigger build #"
RELEASE_PREFIX= "fetchjar-"

FINISHED = "finished"
UNTAGGED = "untagged"
input_path = sys.argv[1] # input path passed as cli argument
output_path = sys.argv[2] # output path passed as cli argument
output_path = sys.argv[2].rstrip("/") # output path passed as cli argument
token = sys.argv[3] # token passed as cli argument

def fetchJars(input_path, output_path, token):
class AlreadyExistsException(Exception):
def __init__(self, file_path):
self.file_path = file_path
self.message = file_path + " already exists"
def __str__(self):
return self.message

def fetch_jars(input_path, output_path, token):
# this method reads a csv input file, with the projects name and path
# for each project it downloads the build generated via github releases
# and moves the builds to the output generated by the framework

print("Starting build collection")

token_user = get_github_user(token)[LOGIN]

parsed_input = read_input(input_path)
parsed_output = read_output(output_path)
new_results_file = []

for project in parsed_input:

splited_project_path = project[PATH].split('/')
project_name = splited_project_path[len(splited_project_path) - 1]
github_project = token_user + '/' + project_name
print (project_name)

get_builds_and_wait(github_project)

releases = get_github_releases(token, github_project)

# download the releases for the project moving them to the output directories
for release in releases:
# check if release was generated by the framework
if (release[NAME].startswith(RELEASE_PREFIX)):
commit_sha = release[NAME].replace(RELEASE_PREFIX, '')
print ("Downloading " + commit_sha )
try:
download_path = mount_download_path(output_path, project, commit_sha)
download_url = release[ASSETS][0][DOWNLOAD_URL]
download_file(download_url, download_path)
if (commit_sha in parsed_output):
new_results_file.append(parsed_output[commit_sha])
untar_and_remove_file(download_path)
print (download_path + ' is ready')
except:
pass
try:
token_user_name = get_github_user(token)[LOGIN]

parsed_input = read_csv(input_path, ",")
parsed_output = read_csv(output_path + "/data/results.csv", ";")

projects = map(lambda p: process_project(p, token_user_name), parsed_input)
parsed_output_hash = output_to_hash(parsed_output)

new_results_file = []

for project in projects:
github_project = project[FORK_URL]
project_name = project[NAME]

try:
print (project_name)

get_builds_and_wait(github_project)

releases = get_github_releases(token, github_project)
# download the releases for the project moving them to the output directories
for release in releases:
# check if release was generated by the framework
if (release[NAME].startswith(RELEASE_PREFIX)):
commit_sha = strip_release_prefix(release)

# preparing and downloading build
print ("Scenario: " + commit_sha)

try:
download_build(output_path, project_name, commit_sha, release)

new_results_file.append(parsed_output_hash[project_name + commit_sha])

print ("Scenario is ready")
except AlreadyExistsException as ae:
new_results_file.append(parsed_output_hash[project_name + commit_sha])

print (ae)
except Exception as e:
print ("Error downloading scenario: " + str(e))

remove_commit_files_without_builds (output_path, project_name)
except Exception as e:
print ("Error fetching builds for project " + project_name + ": " + str(e))

save_results_with_builds(output_path, new_results_file)
except Exception as e:
print (e)

def process_project(data, token_user_name):
project = {}

remove_commit_files_without_builds (output_path, project_name)

with open(output_path + "/data/results-with-builds.csv", 'w') as output_file:
output_file.write("project;merge commit;className;method;left modifications;left deletions;right modifications;right deletions\n")
output_file.write("\n".join(new_results_file))
output_file.close()

def read_output(output_path):
fo = open(output_path + "/data/results.csv")
file = fo.read()
fo.close()

file_out_lines = file.split("\n")
return parse_output(file_out_lines)

def parse_output(lines):
result = {}
for line in lines[1:]:
cells = line.split(";")
if (len (cells) > 1):
result[cells[1]] = line
return result

def read_input(input_path):
f = open(input_path, "r")
file = f.read()
f.close()

brute_lines = file.split("\n")
return parse_input(brute_lines)

def parse_input(lines):
# parse framework input csv file
result = []
for line in lines[1:]:
cells = line.split(",")
if (len (cells) > 1):
method = {}
method[NAME] = cells[0]
method[PATH] = cells[1]
result.append(method)
return result
splited_project_path = data[PATH].split('/')
github_project_name = splited_project_path[len(splited_project_path) - 1]
github_project = token_user_name + '/' + github_project_name
# check if framework used optional custom name

project[PATH] = data[PATH]
project[FORK_URL] = github_project
project[NAME] = data[NAME] if data[NAME] else github_project_name

return project

def download_build(output_path, project_name, commit_sha, release):
print ("Downloading")
scenario_path = mount_scenario_path(output_path, project_name, commit_sha)

if path.exists(scenario_path):
tar_path = scenario_path + "result.tar.gz"

build_path = scenario_path + "build"

if path.exists(tar_path):
raise AlreadyExistsException(tar_path)

if path.exists(build_path):
raise AlreadyExistsException(build_path)

download_url = get_download_url(release)

download_file(download_url, tar_path)

untar_and_remove_file(tar_path, build_path)
else:
raise Exception("Scenario folder: " + scenario_path + " doesn't exist")

def output_to_hash(parsed_output):
parsed_output_hash = {}

for scenario in parsed_output:
parsed_output_hash[scenario[PROJECT] + scenario[MERGE_COMMIT]] = scenario

return parsed_output_hash

def strip_release_prefix(release):
return release[NAME].replace(RELEASE_PREFIX, "")

def get_download_url(release):
return release[ASSETS][0][BROWSER_DOWNLOAD_URL]

def save_results_with_builds(output_path, new_results_file):
with open(output_path + "/data/results-with-builds.csv", 'w') as outputFile:
csv_writer = csv.DictWriter(outputFile, delimiter=";",
fieldnames=["project","merge commit","className","method","left modifications","left deletions","right modifications","right deletions"])

csv_writer.writeheader()
for scenario in new_results_file:
csv_writer.writerow(scenario)


def read_csv(file_path, delimiter):
with open(file_path, "r") as input_lines:
return list(csv.DictReader(input_lines, delimiter=delimiter))

def download_file(url, target_path):
# download file from url
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(target_path, 'wb') as f:
f.write(response.raw.read())
else:
raise Exception("Download request returned status code: " + response.status_code)

def mount_download_path(output_path, project, commit_sha):
def mount_scenario_path(output_path, project_name, commit_sha):
# mount path where the downloaded build will be moved to
return output_path + '/files/' + project[NAME] + '/' + commit_sha + '/result.tar.gz'
return output_path + '/files/' + project_name + '/' + commit_sha + "/"

def untar_and_remove_file(download_path):
download_dir = download_path.replace('result.tar.gz', '')
subprocess.call(['mkdir', download_dir + 'build'])
subprocess.call(['tar', '-xf', download_path, '-C', download_dir + '/build', ])
subprocess.call(['rm', download_path])
def untar_and_remove_file(tar_path, output_path):
subprocess.call(['mkdir', output_path])
subprocess.call(['tar', '-xf', tar_path, '-C', output_path ])
subprocess.call(['rm', tar_path])

def get_builds_and_wait(project):
has_pendent = True
filtered_builds = []
while (has_pendent):
builds = get_travis_project_builds(project)
filtered_builds = filter (lambda x: not x["branch"].startswith("untagged"), builds)
filtered_builds = filter (lambda x: not x[BRANCH].startswith(UNTAGGED), builds)

has_pendent = False
for build in filtered_builds:
print (build["state"])
has_pendent = has_pendent or (build["state"] != "finished")
print (build[BRANCH] + ": " + build[STATE] )
has_pendent = has_pendent or (build[STATE] != FINISHED)

if (has_pendent):
print ("Waiting 30 seconds")
Expand All @@ -145,34 +200,52 @@ def get_builds_and_wait(project):


def get_travis_project_builds(project):
return requests.get(TRAVIS_API + '/repos/' + project + '/builds').json()
res = requests.get(TRAVIS_API + '/repos/' + project + '/builds')

try:
res.raise_for_status()

return res.json()
except Exception as e:
raise Exception("Error getting travis builds: " + str(e))

def get_github_user(token):
return requests.get(GITHUB_API + '/user', headers=get_headers(token)).json()
res = requests.get(GITHUB_API + '/user', headers=get_headers(token))
try:
res.raise_for_status()

return res.json()
except Exception as e:
raise Exception("Error getting github user: " + str(e))

def get_github_releases(token, project):
return requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token)).json()
res = requests.get(GITHUB_API + '/repos/' + project + '/releases', headers=get_headers(token))
try:
res.raise_for_status()

return res.json()
except Exception as e:
raise Exception("Error getting github releases: " + str(e))

def get_headers(token):
return {
"Authorization": "token " + token
}


def remove_commit_files_without_builds (output_path, project_name):
files_path = output_path + "/files/" + project_name + "/"

if (os.path.exists(files_path)):
commit_dirs = os.listdir(files_path)
if (path.exists(files_path)):
commit_dirs = listdir(files_path)

for directory in commit_dirs:
commit_dir = files_path + directory
build_dir = commit_dir + "/build"

if (not os.path.exists(build_dir)):
if (not path.exists(build_dir)):
shutil.rmtree(commit_dir)

if (len (os.listdir(files_path)) == 0 ):
if (len (listdir(files_path)) == 0 ):
shutil.rmtree(files_path)

fetchJars(input_path, output_path, token)
fetch_jars(input_path, output_path, token)
Loading

0 comments on commit bb5ab40

Please sign in to comment.