Skip to content

Commit

Permalink
GHA merge: analyzer (#381)
Browse files Browse the repository at this point in the history
* Check cancelled status, add test cases.

* Add test cases and fix gradle detection issue

* Fix maven failed test parsing, make JavaGradleAnalyzer works with GitHub Actions.

* Sync analyzers, add more test cases.

* Add test cases for Ant analyzer

* Improve Gradle detection in get_build_system_from_build_command. Improve Gradle and Ant analyzers. Add test cases for all build systems.

* Improve GitHub Actions' primary language detection. Fix Python analyzer (pytest) and add test cases.

* Improve Python analyzer and Java Other analyzer. Add test cases for Python and Java Other builds.

* Make analyze_primary_language works with node_js logs.
Add GitHub Actions build logs to test Javascript analyzer.

* Replace Travis' logs with GitHub Actions' logs

* Improve could_not_resolve_dep checking. Replace Travis' logs with GitHub Actions' logs

* Use the two reproducible (no spawner) artifacts to test ResultComparer

* Clean up
Add back tr_log_setup_time attribute, fix typo, fix problem with print_result, and make trigger_sha optional because analyzer can find trigger_sha using job_id.

* Fix test cases and fold's duration bug.

* Add result_comparer mismatches test

* Test tr_log_setup_time and tr_log_buildduration

* Fix tr_log_setup_time for some gradle build and fix analyze_primary_language.

* case-sensitive

* Fix style/linting issues

* Fix analyzer's `OUT_OF_FOLD` and unknown status bugs (#42)

* Fix analyzer

* Add empty line

* Adds GHA functionality to analyzer using ABCs

* Disambiguate tests/data for Travis

* Modifications for test_github.py

* Modify travis tests

* Remove unnecessary link

---------

Co-authored-by: Kevin Guan <[email protected]>
Co-authored-by: Kevin Guan <[email protected]>
  • Loading branch information
3 people authored Jun 29, 2023
1 parent 454d148 commit a067a39
Show file tree
Hide file tree
Showing 2,209 changed files with 498,969 additions and 273 deletions.
42 changes: 29 additions & 13 deletions bugswarm/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,56 @@
import pprint

from .dispatcher import Dispatcher
from .gha_dispatcher import GHADispatcher
from .result_comparer import ResultComparer
from .travis_dispatcher import TravisDispatcher

LOAD_JSON = 1


class Analyzer(object):
def __init__(self):
self.dispatcher = Dispatcher()
self.travis_dispatcher = TravisDispatcher()
self.gha_dispatcher = GHADispatcher()
self.comparer = ResultComparer()

def analyze_single_log(self, log_path, job_id, build_system=None, trigger_sha=None, repo=None, print_result=False,
mining=True):
def analyze_single_log(self, log_path, job_id, ci_service, build_system=None,
trigger_sha=None, repo=None, print_result=False, mining=True):
"""
When mining is True and build_system is None, Analyzer will get build system from Travis and GitHub API.
Otherwise, Analyzer will get build_system from BugSwarm API.
"""
if not mining and not build_system:
# Not in mining mode, and we don't have build_system.
build_system = self.dispatcher.get_build_system_from_bugswarm_database(job_id)
build_system = self.travis_dispatcher.get_build_system_from_bugswarm_database(job_id)

result = self.dispatcher.analyze(log_path, job_id, build_system, trigger_sha, repo)
if ci_service == 'github':
dispatcher = self.gha_dispatcher
elif ci_service == 'travis':
dispatcher = self.travis_dispatcher
else:
raise ValueError('`ci_service` must be one of "travis" or "github".')

result = dispatcher.analyze(log_path, job_id, build_system, trigger_sha, repo)
if print_result:
pprint.pprint(result)

return result

def compare_single_log(self, reproduced, orig, job_id, build_system=None, trigger_sha=None, repo=None,
print_result=False, mining=True):
def compare_single_log(self, reproduced, orig, job_id, ci_service, build_system=None,
trigger_sha=None, repo=None, print_result=False, mining=True):
if not mining and not build_system:
# Not in mining mode, and we don't have build_system.
build_system = self.dispatcher.get_build_system_from_bugswarm_database(job_id)
build_system = self.travis_dispatcher.get_build_system_from_bugswarm_database(job_id)

if ci_service == 'github':
dispatcher = self.gha_dispatcher
elif ci_service == 'travis':
dispatcher = self.travis_dispatcher
else:
raise ValueError('`ci_service` must be one of "travis" or "github".')

reproduced_result = self.dispatcher.analyze(reproduced, job_id, build_system, trigger_sha, repo)
original_result = self.dispatcher.analyze(orig, job_id, build_system, trigger_sha, repo)
reproduced_result = dispatcher.analyze(reproduced, job_id, build_system, trigger_sha, repo)
original_result = dispatcher.analyze(orig, job_id, build_system, trigger_sha, repo)
match, mismatched_attributes = ResultComparer.compare_attributes(reproduced_result, original_result)
if print_result:
pprint.pprint(match)
Expand All @@ -49,9 +65,9 @@ def force_re_analyze_travis_log(self, orig, job_id, build_system=None, trigger_s

if not mining and not build_system:
# Not in mining mode, and we don't have build_system.
build_system = self.dispatcher.get_build_system_from_bugswarm_database(job_id)
build_system = self.travis_dispatcher.get_build_system_from_bugswarm_database(job_id)

result = self.dispatcher.analyze(orig, job_id, build_system, trigger_sha, repo, 1)
result = self.travis_dispatcher.analyze(orig, job_id, build_system, trigger_sha, repo, 1)
if print_result:
pprint.pprint(result)

Expand Down
145 changes: 145 additions & 0 deletions bugswarm/analyzer/base_log_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import abc
import re


class LogAnalyzerABC(abc.ABC):
def __init__(self, primary_language, folds, job_id):
self.primary_language = primary_language
self.job_id = job_id
self.folds = folds
self.OUT_OF_FOLD = 'out_of_fold'
self.test_lines = []
self.frameworks = []
self.tests_run = False
self.analyzer = 'plain'
self.tests_failed = []
self.initialized_tests = False
self.err_msg = []
self.err_lines = []
self.connection_lines = []

def analyze(self):
self.custom_analyze()
self.pre_output()
self.sanitize_output()

@abc.abstractmethod
def custom_analyze(self):
"""
Subclasses must implement their own analyses in this method.
They must also ensure that they call `super().custom_analyze()` so any mixins after them in
the MRO get called as well.
"""
pass

# Assign function values to variables before outputting.
def pre_output(self):
if hasattr(self, 'bool_tests_failed'):
self.did_tests_fail = self.bool_tests_failed()

# Perform last-second sanitaztion of variables. Can be used to guarantee invariants.
def sanitize_output(self):
if hasattr(self, 'pure_build_duration') and hasattr(self, 'test_duration'):
if self.pure_build_duration < self.test_duration:
del self.pure_build_duration

def add_framework(self, framework):
if framework not in self.frameworks:
self.frameworks.append(framework)

# pre-init values so we can sum-up in case of aggregated test sessions (always use calc_ok_tests when you use this)
def init_tests(self):
if not self.initialized_tests:
self.test_duration = 0
self.num_tests_run = 0
self.num_tests_failed = 0
self.num_tests_ok = 0
self.num_tests_skipped = 0
self.initialized_tests = True

# For non-aggregated reporting, at the end (always use this when you use init_tests)
def uninit_ok_tests(self):
if hasattr(self, 'num_tests_run') and hasattr(self, 'num_tests_failed'):
self.num_tests_ok += self.num_tests_run - self.num_tests_failed

# The output is in seconds, even when it takes longer than a minute
@staticmethod
def convert_plain_time_to_seconds(s):
match = re.search(r'(.+)s', s, re.M)
if match:
return round(float(match.group(1)), 2)
return 0

# Returns a dictionary containing the analysis results. All of the attributes are extracted by build log analysis.
def output(self):
mapping = {
# The build ID of the Travis build.
'tr_build_id': 'build_id',
# The job ID of the build job under analysis.
'tr_job_id': 'job_id',
# The primary programming language.
'tr_log_lan': 'primary_language',
# The overall return status of the build.
'tr_log_status': 'status',
# The setup time before the script phase (the actual build) starts, in seconds.
'tr_log_setup_time': 'setup_time_before_build',
# The build log Analyzer that was invoked for analysis of this build.
'tr_log_analyzer': 'analyzer',
# The testing frameworks ran.
'tr_log_frameworks': 'frameworks',
# Whether tests were run.
'tr_log_bool_tests_ran': 'tests_run',
# Whether tests failed.
'tr_log_bool_tests_failed': 'did_tests_fail',
# Number of tests that succeeded.
'tr_log_num_tests_ok': 'num_tests_ok',
# Number of tests that failed.
'tr_log_num_tests_failed': 'num_tests_failed',
# Number of tests that ran in total.
'tr_log_num_tests_run': 'num_tests_run',
# Number of tests that were skipped.
'tr_log_num_tests_skipped': 'num_tests_skipped',
# Names of the tests that failed.
'tr_log_tests_failed': 'tests_failed',
# Duration of the running the tests, in seconds.
'tr_log_testduration': 'test_duration',
# Duration of running the build command like maven or ant, in seconds. (If present, this duration should be
# longer than 'tr_log_testduration' since it includes this phase.)
'tr_log_buildduration': 'pure_build_duration',

# Added: Error messages in log.
'tr_err_msg': 'err_msg',
# Added: Build image provisioned date and time.
'tr_build_image': 'build_image',
# Added: (Travis) Worker instance info in log.
'tr_worker_instance': 'worker_instance',
# Added: (Travis) Using worker line in log, if it exists, will be first line of log.
'tr_using_worker': 'using_worker',
# Added: Capturing the line that specifies operating system.
'tr_os': 'os',
# Added: Capturing the lines that likely mention connection problems, dependencies, or endpoints that no
# longer exist online.
'tr_connection_lines': 'connection_lines',
# Added: Capturing the lines that mention could not resolve dependencies.
'tr_could_not_resolve_dep': 'could_not_resolve_dep',
# Added: (Travis) Cookbook version in log.
'tr_cookbook': 'cookbook',
# Added: Invalid log tells whether the original log downloaded is an invalid log (error message).
'tr_invalid_log': 'invalid_log',
# Added: The build system used by the project as indicated by the build log.
'tr_build_system': 'build_system',
}
output = {}
for key in mapping:
if not hasattr(self, mapping[key]):
if key in ['tr_log_num_tests_run', 'tr_log_num_tests_failed']:
output[key] = 0
else:
output[key] = 'NA'
else:
attr = getattr(self, mapping[key])
if isinstance(attr, list):
output[key] = '#'.join(attr)
else:
output[key] = attr
return output
25 changes: 19 additions & 6 deletions bugswarm/analyzer/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,30 @@ def main(argv=None):
# Log the current version of this BugSwarm component.
log.info(get_current_component_version_message('Analyzer'))

mode, reproduced, orig, log_filename, print_result, job_id, build_system, trigger_sha, repo, mining =\
mode, reproduced, orig, log_filename, print_result, job_id, build_system, trigger_sha, repo, mining, ci_service =\
_validate_input(argv)

analyzer = Analyzer()
if mode == 0:
analyzer.compare_single_log(reproduced, orig, job_id, build_system, trigger_sha, repo, print_result,
analyzer.compare_single_log(reproduced, orig, job_id, ci_service, build_system, trigger_sha, repo, print_result,
mining=mining)
elif mode == 1:
analyzer.analyze_single_log(log_filename, job_id, build_system, trigger_sha, repo, print_result, mining=mining)
analyzer.analyze_single_log(
log_filename,
job_id,
ci_service,
build_system,
trigger_sha,
repo,
print_result,
mining=mining)
else:
raise Exception('Unsupported mode: {}.'.format(mode))


def _validate_input(argv):
shortopts = 'r:o:l:j:b:t:'
longopts = 'reproduced= orig= log= job_id= build_system= trigger_sha= repo= mining= print '.split()
shortopts = 'r:o:l:j:b:t:c:'
longopts = 'reproduced= orig= log= job_id= build_system= trigger_sha= repo= mining= ci_service= print '.split()
mode = -1
reproduced = None
orig = None
Expand All @@ -42,6 +50,7 @@ def _validate_input(argv):
trigger_sha = None
repo = None
mining = True
ci_service = 'github'

try:
optlist, args = getopt.getopt(argv[1:], shortopts, longopts)
Expand All @@ -68,6 +77,8 @@ def _validate_input(argv):
print_result = True
if opt == '--mining':
mining = False if arg.lower() in ['0', 'off', 'false'] else True
if opt in ('-c', '--ci_service'):
ci_service = arg

if reproduced and orig:
if job_id and '.log' in reproduced and '.log' in orig:
Expand All @@ -78,7 +89,8 @@ def _validate_input(argv):
else:
print_usage()
sys.exit(2)
return mode, reproduced, orig, log_filename, print_result, job_id, build_system, trigger_sha, repo, mining
return (mode, reproduced, orig, log_filename, print_result, job_id,
build_system, trigger_sha, repo, mining, ci_service)


def print_usage():
Expand Down Expand Up @@ -106,6 +118,7 @@ def print_usage():
log.info('{:<30}{:<30}'.format('-t, --trigger_sha', 'trigger sha for log'))
log.info('{:<30}{:<30}'.format('--repo', 'repository of the project'))
log.info('{:<30}{:<30}'.format('--mining', 'use false to turn off mining mode, default is on.'))
log.info('{:<30}{:<30}'.format('-c, --ci_service', 'ci service for log ("travis" or "github")'))


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit a067a39

Please sign in to comment.