From be714feca45d1f3aec5abf96ed06e393dfcbcc09 Mon Sep 17 00:00:00 2001 From: alexbrozych <120374931+alexbrozych@users.noreply.github.com> Date: Thu, 12 Jan 2023 11:30:25 +0000 Subject: [PATCH] feat: Add GitLab CE scanning (#48) * feat: Add GitLab CE scanning and testing * feat: Allow overriding the GitLab URL to scan instances other than the primary GitLab instance * feat: Add testing for GitLab CE scanning * feat: Add testing for GitLab scanning * feat: Move GitLab's args in to their own group * Renames GitLab's `--org` and `--pat` option to `--group` and `--access-token` respectively. * Adds `--update-cert-store` for updating the cert store when running inside docker. * Update README.md * Update README.md * feat: Add flag to disable HTTPS validation * feat: Make it so disabling HTTPS applied to all * feat: Add testing for disabling HTTPS Co-authored-by: SimonGurney --- .github/workflows/behave.yml | 2 ++ README.md | 23 ++++++++++++-- argparsing.py | 39 +++++++++++++++++++++-- features/environment.py | 14 ++++++++- features/helper.py | 39 +++++++++++++++++++++-- features/secret_detection.feature | 36 +++++++++++++++++++++- main.py | 11 +++++++ repos.py | 11 +++++-- tasks.py | 51 +++++++++++++++++++++++-------- 9 files changed, 201 insertions(+), 25 deletions(-) diff --git a/.github/workflows/behave.yml b/.github/workflows/behave.yml index 2f43abd..4107428 100644 --- a/.github/workflows/behave.yml +++ b/.github/workflows/behave.yml @@ -18,8 +18,10 @@ jobs: - name: Install and run behave run: | docker run \ + -e SKIP_IN_RUNNER="" \ -e SECRETMAGPIE_GITHUB_PAT=${{ secrets.SECRETMAGPIE_GITHUB_PAT }} \ -e SECRETMAGPIE_ADO_PAT=${{ secrets.SECRETMAGPIE_ADO_PAT }} \ + -e SECRETMAGPIE_GITLAB_PAT=${{ secrets.SECRETMAGPIE_GITLAB_PAT }} \ --entrypoint sh \ secret-magpie \ -c "pip install behave; python -m behave" diff --git a/README.md b/README.md index 9d1eaf7..2983c13 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,17 @@ docker cp 'container':/app/results.[csv/json] /host/path/target Alternatively you mount the volume the results folder and direct output to it ``` -docker -v /localpath:/app/results run ... blah ... --out results/results +docker -v /localpath:/app/results run punksecurity/secret-magpie --out results/results ``` +# Passing certificates to Docker +If you're running our tool inside Docker, you may find you need to pass in CA certificates from the host. + +This can be achieved using the following command + +``` +docker -v /path/to/your/certificates:/usr/local/share/ca-certificates/ run punksecurity/secret-magpie --update-ca-certificates +``` + ## Running the tool locally If you prefer not to use Docker then you will need to manually install the following: @@ -128,10 +137,20 @@ options: --no-stats Do not output stats summary --ignore-branches-older-than IGNORE_BRANCHES_OLDER_THAN Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00 -github/gitlab/azuredevops: + --update-ca-store If you're running secret-magpie-cli within Docker and need to provide an external CA certificate to trust, pass this option to cause it to update the container's certificate store. + --dont-validate-https + Disables HTTPS validation for APIs/cloning. + +github/azuredevops: --org ORG Organisation name to target --pat PAT Personal Access Token for API access and cloning +gitlab: + --group GROUP The GitLab Group to import repositories from + --access-token ACCESS_TOKEN + The access token to use for accessing GitLab. + --gitlab-url GITLAB_URL + URL of the GitLab instance to run against. (default: https://gitlab.com) bitbucket: --workspace WORKSPACE --username USERNAME diff --git a/argparsing.py b/argparsing.py index 60e9083..9660668 100644 --- a/argparsing.py +++ b/argparsing.py @@ -43,12 +43,30 @@ def error(self, message): choices=["github", "gitlab", "bitbucket", "azuredevops", "filesystem"], ) -github_group = parser.add_argument_group("github/gitlab/azuredevops") +github_group = parser.add_argument_group("github/azuredevops") github_group.add_argument("--org", type=str, help="Organisation name to target") github_group.add_argument( "--pat", type=str, help="Personal Access Token for API access and cloning" ) +gitlab_group = parser.add_argument_group("gitlab") +gitlab_group.add_argument( + "--group", + type=str, + help="The GitLab Group to import repositories from", +) +gitlab_group.add_argument( + "--access-token", + type=str, + help="The access token to use for accessing GitLab.", +) +gitlab_group.add_argument( + "--gitlab-url", + type=str, + default="https://gitlab.com", + help="URL of the GitLab instance to run against. (default: %(default)s)", +) + bitbucket_group = parser.add_argument_group("bitbucket") bitbucket_group.add_argument("--workspace") bitbucket_group.add_argument("--username") @@ -73,6 +91,7 @@ def error(self, message): action="store_true", help="Don't remove checked-out repositories upon completion", ) + parser.add_argument( "--out-format", type=str, @@ -130,6 +149,18 @@ def error(self, message): help="Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00", ) +parser.add_argument( + "--update-ca-store", + action="store_true", + help="If you're running secret-magpie-cli within Docker and need to provide an external CA certificate to trust, pass this option to cause it to update the container's certificate store.", +) + +parser.add_argument( + "--dont-validate-https", + action="store_true", + help="Disables HTTPS validation for APIs/cloning.", +) + def parse_args(): args = parser.parse_args() @@ -141,8 +172,10 @@ def parse_args(): if ("github" == args.provider) and (args.pat is None or args.org is None): parser.error("github requires --pat and --org") - if ("gitlab" == args.provider) and (args.pat is None or args.org is None): - parser.error("gitlab requires --pat and --org") + if ("gitlab" == args.provider) and ( + args.access_token is None or args.group is None + ): + parser.error("gitlab requires --access-token and --group") if ("azuredevops" == args.provider) and (args.pat is None or args.org is None): parser.error("azuredevops requires --pat and --org") diff --git a/features/environment.py b/features/environment.py index 8914ff1..63bf9fa 100644 --- a/features/environment.py +++ b/features/environment.py @@ -8,7 +8,7 @@ def before_tag(context, tag): - tag_parts = tag.split(".") + tag_parts = tag.split(".", 2) match tag_parts[0].lower(): case "fixture": if len(tag_parts) > 1: @@ -31,6 +31,9 @@ def before_tag(context, tag): context.repo_type = "gitlab" context.org = tag_parts[1] + if len(tag_parts) > 2: + context.url = tag_parts[2] + # PAT is provided via environment variables context.pat = os.environ["SECRETMAGPIE_GITLAB_PAT"] @@ -57,6 +60,9 @@ def before_tag(context, tag): context.args.append("--no-cleanup") + case "pat": + context.pat = os.environ.get(tag_parts[1], "") + def after_tag(context, tag): tag_parts = tag.split(".") @@ -69,3 +75,9 @@ def after_tag(context, tag): except: time.sleep(10) continue + + +def before_scenario(context, scenario): + if "skipinrunner" in scenario.effective_tags: + if os.environ.get("SKIP_IN_RUNNER") != None: + scenario.skip("Skipping in GitHub Action Runner") diff --git a/features/helper.py b/features/helper.py index 43985e9..621b9aa 100644 --- a/features/helper.py +++ b/features/helper.py @@ -88,11 +88,16 @@ def run_secret_magpie(context, engines, outformat="csv", args=[]): "--out-format", outformat, "gitlab", - "--org", + "--group", context.org, - "--pat", + "--access-token", context.pat, ] + + try: + param_list.extend(["--gitlab-url", context.url]) + except: + pass case "azuredevops": param_list = [ "python", @@ -146,6 +151,9 @@ def run_secret_magpie(context, engines, outformat="csv", args=[]): if "❌" in proc.stdout: raise AssertionError(proc.stdout) + if "warning" in proc.stdout: + raise AssertionError(proc.stdout) + stdout = proc.stdout.split("\n") context.stdout = stdout[10:][:1] @@ -240,6 +248,33 @@ def step_impl( run_secret_magpie(context, engines, outformat=format, args=args) +@when( + "we run secret-magpie-cli in {branch_toggle} branch mode, https validation {https_validation}, ignoring commits older than {threshold_date}, extra context {extra_context}, secret storing {secret_toggle}, output format {format} and engines: {engines}" +) +def step_impl( + context, + branch_toggle, + https_validation, + threshold_date, + extra_context, + secret_toggle, + format, + engines, +): + args = [] + if https_validation == "disabled": + args.append("--dont-validate-https") + if threshold_date != "None": + args.append(f"--ignore-branches-older-than={threshold_date}") + if extra_context == "enabled": + args.append("--extra-context") + if secret_toggle == "disabled": + args.append("--dont-store-secret") + if branch_toggle == "single": + args.append("--single-branch") + run_secret_magpie(context, engines, outformat=format, args=args) + + @then("secret-magpie-cli's output will be") def step_impl(context): stdout = context.stdout diff --git a/features/secret_detection.feature b/features/secret_detection.feature index 54334f8..5f4b274 100644 --- a/features/secret_detection.feature +++ b/features/secret_detection.feature @@ -52,7 +52,7 @@ Feature: Validate secret detection against various engines. Then there will be 1 secrets detected @github.secretmagpie-testing - Scenario: Validate that we can detect secrets for remote repos + Scenario: Validate that we can detect secrets for a GitHub remote When we run secret-magpie-cli with engines: all Then there will be 4 secrets detected @@ -77,3 +77,37 @@ Feature: Validate secret detection against various engines. | mode | | single | | multi | + + @skipinrunner + @gitlab.secretmagpie-testing.https://gitlab.punksecurity.io + @pat.SECRETMAGPIE_GITLAB_CE_PAT + Scenario: Validate that we can detect secrets for GitLab CE remote + When we run secret-magpie-cli with engines: all + Then there will be 4 secrets detected + + @gitlab.secretmagpie-testing + Scenario: Validate that we can detect secrets for GitLab remote + When we run secret-magpie-cli with engines: all + Then there will be 4 secrets detected + + @github.secretmagpie-testing + Scenario: Ensure that we still detect secrets on GitHub remote works when we turn off HTTPS validation + When we run secret-magpie-cli in multi branch mode, https validation disabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 4 secrets detected + + @gitlab.secretmagpie-testing + Scenario: Ensure that we still detect secrets on GitLab remote works when we turn off HTTPS validation + When we run secret-magpie-cli in multi branch mode, https validation disabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 4 secrets detected + + @skipinrunner + @gitlab.secretmagpie-testing.https://gitlab.punksecurity.io + @pat.SECRETMAGPIE_GITLAB_CE_PAT + Scenario: Ensure that we still detect secrets on GitLab CE remote works when we turn off HTTPS validation + When we run secret-magpie-cli in multi branch mode, https validation disabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 4 secrets detected + + @azuredevops.PunkSecurity + Scenario: Ensure that we still detect secrets on AzureDevOps remote works when we turn off HTTPS validation + When we run secret-magpie-cli in multi branch mode, https validation disabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 4 secrets detected diff --git a/main.py b/main.py index cb15f6a..567dc3c 100644 --- a/main.py +++ b/main.py @@ -9,12 +9,22 @@ import output import datetime import time +import os +import subprocess # nosec blacklist +import urllib3 if __name__ == "__main__": + urllib3.disable_warnings() print(argparsing.banner) args = argparsing.parse_args() cleanup = not (args.no_cleanup or "filesystem" == args.provider) + with open(os.devnull, "wb") as devnull: + if args.update_ca_store: + subprocess.call( # nosec subprocess_without_shell_equals_true start_process_with_partial_path + ["update-ca-certificates"], stdout=devnull, stderr=devnull + ) + threshold_date = None if args.ignore_branches_older_than != None: try: @@ -44,6 +54,7 @@ extra_context=args.extra_context, cleanup=cleanup, threshold_date=threshold_date, + validate_https=not args.dont_validate_https, ) pool = ThreadPool(args.parallel_repos) results = pool.imap_unordered(f, repos) diff --git a/repos.py b/repos.py index ff94397..cd04cbb 100644 --- a/repos.py +++ b/repos.py @@ -21,13 +21,18 @@ def __init__(self, clone_url, html_url, name, credentials: RepoCredentials) -> N self.name = name self.clone_url = clone_url - def clone_repo(self): + def clone_repo(self, validate_https=True): path = sha256(self.clone_url.encode("utf-8")).hexdigest()[0:8] if self.clone_url.lower()[0:8] != "https://": raise Exception(f"clone url not in expected format: '{self.clone_url}'") target = f"https://{self.credentials.get_auth_string()}@{self.clone_url[8:]}" - GitRepo.clone_from(target, path).remotes[0].fetch() + if validate_https: + GitRepo.clone_from(target, path).remotes[0].fetch() + else: + GitRepo.clone_from(target, path, c="http.sslVerify=false").remotes[ + 0 + ].fetch() return path def link_to_file(self, commit_hash, file_path, line_num): @@ -60,7 +65,7 @@ class FilesystemRepo(Repo): def __init__(self, clone_url): super().__init__(clone_url, "", clone_url, None) - def clone_repo(self): + def clone_repo(self, validate_https=False): return self.clone_url def link_to_file(self, commit_hash, file_path, line_num): diff --git a/tasks.py b/tasks.py index 110fa44..85695dd 100644 --- a/tasks.py +++ b/tasks.py @@ -88,10 +88,11 @@ def process_repo( extra_context=False, cleanup=True, threshold_date=None, + validate_https=True, ): out = [] try: - path = repo.clone_repo() + path = repo.clone_repo(validate_https=validate_https) except: return [ProcessRepoResult(repo, "FAIL", "Could not clone")] @@ -139,8 +140,13 @@ def process_repo( return ret -def get_repos_from_bitbucket(workspace, username, password): - instance = bitbucket.Cloud(username=username, password=password, cloud=True) +def get_repos_from_bitbucket(workspace, username, password, dont_validate_https): + instance = bitbucket.Cloud( + username=username, + password=password, + cloud=True, + verify_ssl=not dont_validate_https, + ) workspace = instance.workspaces.get(workspace) for repo in workspace.repositories.each(): @@ -159,8 +165,8 @@ def get_repos_from_bitbucket(workspace, username, password): ) -def get_repos_from_github(org, pat): - g = Github(pat) +def get_repos_from_github(org, pat, dont_validate_https): + g = Github(pat, verify=not dont_validate_https) organisation = g.get_organization(org) repos = organisation.get_repos() @@ -173,7 +179,7 @@ def get_repos_from_github(org, pat): ) -def get_repos_from_gitlab(org, pat): +def get_repos_from_gitlab(org, pat, url, dont_validate_https): def get_projects_from_group(g, group): for project in group.projects.list(all=True): yield project @@ -182,7 +188,7 @@ def get_projects_from_group(g, group): for project in get_projects_from_group(g, group): yield project - g = Gitlab(private_token=pat) + g = Gitlab(private_token=pat, url=url, ssl_verify=not dont_validate_https) group = g.groups.get(org, lazy=True) repos = get_projects_from_group(g, group) @@ -195,14 +201,16 @@ def get_projects_from_group(g, group): ) -def get_repos_from_ado(org, pat): +def get_repos_from_ado(org, pat, dont_validate_https): headers = { "Accept": "application/json", "Authorization": f"Basic {b64encode(f':{pat}'.encode('ascii')).decode()}", } response = requests.get( - f"https://dev.azure.com/{org}/_apis/projects", headers=headers + f"https://dev.azure.com/{org}/_apis/projects", + headers=headers, + verify=not dont_validate_https, ) if response.content == b"": @@ -214,6 +222,7 @@ def get_repos_from_ado(org, pat): response = requests.get( f"https://dev.azure.com/{org}/{project}/_apis/git/repositories", headers=headers, + verify=not dont_validate_https, ) if response.content == b"": continue @@ -233,17 +242,33 @@ def get_repos_from_filesystem(path): def get_repos(provider, **kwargs): if "github" == provider: - return get_repos_from_github(kwargs["org"], kwargs["pat"]) + return get_repos_from_github( + kwargs["org"], + kwargs["pat"], + kwargs["dont_validate_https"], + ) if "gitlab" == provider: - return get_repos_from_gitlab(kwargs["org"], kwargs["pat"]) + return get_repos_from_gitlab( + kwargs["group"], + kwargs["access_token"], + kwargs["gitlab_url"], + kwargs["dont_validate_https"], + ) if "azuredevops" == provider: - return get_repos_from_ado(kwargs["org"], kwargs["pat"]) + return get_repos_from_ado( + kwargs["org"], + kwargs["pat"], + kwargs["dont_validate_https"], + ) if "bitbucket" == provider: return get_repos_from_bitbucket( - kwargs["workspace"], kwargs["username"], kwargs["password"] + kwargs["workspace"], + kwargs["username"], + kwargs["password"], + kwargs["dont_validate_https"], ) if "filesystem" == provider: return get_repos_from_filesystem(kwargs["path"])