diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index bd55d68..d7fd860 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -1,15 +1,15 @@ -name: bandit -on: [pull_request] -jobs: - sast: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v1 - with: - python-version: 3.10.9 - - name: Install Bandit - run: pip install bandit - - name: Run bandit - run: bandit -r . +name: bandit +on: [pull_request] +jobs: + sast: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v1 + with: + python-version: "3.10" + - name: Install Bandit + run: pip install bandit + - name: Run bandit + run: bandit -r . diff --git a/.github/workflows/behave.yml b/.github/workflows/behave.yml index 4107428..224d387 100644 --- a/.github/workflows/behave.yml +++ b/.github/workflows/behave.yml @@ -1,27 +1,27 @@ -name: behave -on: [pull_request] -jobs: - tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v1 - - name: Run Buildx - run: | - docker buildx build . \ - --load \ - --progress=plain \ - --tag secret-magpie \ - --platform linux/amd64 - - name: Install and run behave - run: | - docker run \ - -e SKIP_IN_RUNNER="" \ - -e SECRETMAGPIE_GITHUB_PAT=${{ secrets.SECRETMAGPIE_GITHUB_PAT }} \ - -e SECRETMAGPIE_ADO_PAT=${{ secrets.SECRETMAGPIE_ADO_PAT }} \ - -e SECRETMAGPIE_GITLAB_PAT=${{ secrets.SECRETMAGPIE_GITLAB_PAT }} \ - --entrypoint sh \ - secret-magpie \ - -c "pip install behave; python -m behave" +name: behave +on: [pull_request] +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + - name: Run Buildx + run: | + docker buildx build . \ + --load \ + --progress=plain \ + --tag secret-magpie \ + --platform linux/amd64 + - name: Install and run behave + run: | + docker run \ + -e SKIP_IN_RUNNER="" \ + -e SECRETMAGPIE_GITHUB_PAT=${{ secrets.SECRETMAGPIE_GITHUB_PAT }} \ + -e SECRETMAGPIE_ADO_PAT=${{ secrets.SECRETMAGPIE_ADO_PAT }} \ + -e SECRETMAGPIE_GITLAB_PAT=${{ secrets.SECRETMAGPIE_GITLAB_PAT }} \ + --entrypoint sh \ + secret-magpie \ + -c "pip install -r test-requirements.txt; python -m behave" diff --git a/README.md b/README.md index 6157f09..b0a95c1 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,8 @@ options: --web Hosts a webserver on http://127.0.0.1:8080 to view the results in browser --to-scan-list TO_SCAN_LIST The file to read the list of repositories to scan from. One repository per line, web URL to the repository. + --gl-config GL_CONFIG + The .toml rules file to use for Gitleaks. github/azuredevops: --org ORG Organisation name to target diff --git a/argparsing.py b/argparsing.py index a9119bb..fafa2c2 100644 --- a/argparsing.py +++ b/argparsing.py @@ -180,6 +180,12 @@ def error(self, message): help="The file to read the list of repositories to scan from. One repository per line, web URL to the repository.", ) +parser.add_argument( + "--gl-config", + type=str, + help="Path to toml file for custom rulesets for Gitleaks", +) + def parse_args(): args = parser.parse_args() @@ -201,4 +207,7 @@ def parse_args(): if "filesystem" == args.provider and (args.path is None): parser.error("filesystem requires --path") + + if args.gl_config is not None and args.disable_gitleaks: + parser.error("Gitleaks can't be disabled if passing a .toml file") return args diff --git a/features/helper.py b/features/helper.py index 1430808..bb8971a 100644 --- a/features/helper.py +++ b/features/helper.py @@ -385,6 +385,15 @@ def onerror(func, path, exc_info): func(path) +@when("we run secret-magpie-cli with a gitleaks {conf} file") +def step_impl(context, conf): + run_secret_magpie( + context, + engines="gitleaks", + args=[f"--gl-config={conf}"], + ) + + class LocalRepos: def __init__(self, rules, dir): # Prepare the directory for repositories diff --git a/features/secret_detection.feature b/features/secret_detection.feature index 1016976..3cb791a 100644 --- a/features/secret_detection.feature +++ b/features/secret_detection.feature @@ -130,11 +130,46 @@ Feature: Validate secret detection against various engines. When we run secret-magpie-cli in multi branch mode, to scan list repos.txt, https validation enabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all Then there will be 2 secrets detected - @azuredevops.PunkSecurity - Scenario: Validate that repo filtering works for AzureDevOps - Given we have a file called repos.txt with content + + @localrepos + @fixture.wantsAWSSecret + Scenario: Ensure that we only detect an AWS secret locally, when a matching rule is provided by toml file, and with only gitleaks enabled + Given we have a file called rules.toml with content """ - https://dev.azure.com/PunkSecurity/SecretMagpie-Testing/_git/ssh_key + [[rules]] + description = "AWS" + id = "aws-access-token" + regex = '''AKIAYVP4CIPPERUVIFXG''' + keywords = [ + "akia","agpa","aida","aroa","aipa","anpa","anva","asia", + ] """ - When we run secret-magpie-cli in multi branch mode, to scan list repos.txt, https validation enabled, ignoring commits older than None, extra context disabled, secret storing enabled, output format csv and engines: all - Then there will be 2 secrets detected + When we run secret-magpie-cli with a gitleaks rules.toml file + Then there will be 1 secrets detected + + @localrepos + @fixture.wantsAWSSecret + @fixture.wantsSSHKey + Scenario: Ensure that we only detect 1 AWS secret locally, and not the SSH key, when a matching rule is provided by toml file, and with only gitleaks enabled + Given we have a file called rules.toml with content + """ + [[rules]] + description = "AWS" + id = "aws-access-token" + regex = '''AKIAYVP4CIPPERUVIFXG''' + keywords = [ + "akia","agpa","aida","aroa","aipa","anpa","anva","asia", + ] + """ + When we run secret-magpie-cli with a gitleaks rules.toml file + Then there will be 1 secrets detected + + @localrepos + @fixture.wantsAWSSecret + Scenario: Ensure that we don't detect any secrets locally, when there are no matching rules provided by toml file, and with only gitleaks enabled + Given we have a file called rules.toml with content + """ + """ + When we run secret-magpie-cli with a gitleaks rules.toml file + Then there will be 0 secrets detected + diff --git a/features/validate_output.feature b/features/validate_output.feature index 4fef391..39e0a73 100644 --- a/features/validate_output.feature +++ b/features/validate_output.feature @@ -1,60 +1,68 @@ -Feature: Validate that the results files produced by secret-magpie-cli is of valid form and contains expected data. - @localrepos - @fixture.wantsSSHKey - Scenario Outline: Validate that the output is of valid form when a repo contains multi-line secrets - When we run secret-magpie-cli with output format and engines: all - Then the results file will be of valid form - - Examples: - | format | - | json | - | csv | - - @localrepos - @fixture.wantsAWSSecret - Scenario Outline: Ensure that the secrets column is blank when using format and we disable storing secrets - When we run secret-magpie-cli with secret storing disabled, output format and engines: all - Then the secret field within the output will be blank - - Examples: - | format | - | json | - | csv | - - @localrepos - Scenario: Ensure that when we run secret-magpie-cli with no engines enabled, we get the correct error - When we run secret-magpie-cli with engines: none - Then secret-magpie-cli's output will be - """ - ERROR: No tools to scan with - """ - - @github.secretmagpie-testing - Scenario: Ensure that we clean up repos that we've cloned when using a remote - When we run secret-magpie-cli with engines: all - Then directory 7c484be0 won't exist - And directory 42cbad53 won't exist - - @no-cleanup - @github.secretmagpie-testing - @rmtree.7c484be0 - @rmtree.42cbad53 - Scenario: Ensure that we clean up repos that we've cloned when using a remote - When we run secret-magpie-cli with engines: all - Then directory 7c484be0 will exist - And directory 42cbad53 will exist - - @localrepos - @wantsAWSSecret - Scenario: Ensure that the date field within the repo is parseable in ISO8601 format. - When we run secret-magpie-cli with engines: all - Then the date column of results.csv will be ISO8601 format - - @localrepos - @wantsAWSSecret - Scenario: Ensure that secret-magpie-cli gives the expected error when we run it with an invalid threshold date - When we run secret-magpie-cli in multi branch mode, ignoring commits older than invaliddate extra context disabled, secret storing enabled, output format csv and engines: all - Then secret-magpie-cli's output will be - """ - ERROR: Invalid ISO format string. - """ +Feature: Validate that the results files produced by secret-magpie-cli is of valid form and contains expected data. + @localrepos + @fixture.wantsSSHKey + Scenario Outline: Validate that the output is of valid form when a repo contains multi-line secrets + When we run secret-magpie-cli with output format and engines: all + Then the results file will be of valid form + + Examples: + | format | + | json | + | csv | + + @localrepos + @fixture.wantsAWSSecret + Scenario Outline: Ensure that the secrets column is blank when using format and we disable storing secrets + When we run secret-magpie-cli with secret storing disabled, output format and engines: all + Then the secret field within the output will be blank + + Examples: + | format | + | json | + | csv | + + @localrepos + Scenario: Ensure that when we run secret-magpie-cli with no engines enabled, we get the correct error + When we run secret-magpie-cli with engines: none + Then secret-magpie-cli's output will be + """ + ERROR: No tools to scan with + """ + + @github.secretmagpie-testing + Scenario: Ensure that we clean up repos that we've cloned when using a remote + When we run secret-magpie-cli with engines: all + Then directory 7c484be0 won't exist + And directory 42cbad53 won't exist + + @no-cleanup + @github.secretmagpie-testing + @rmtree.7c484be0 + @rmtree.42cbad53 + Scenario: Ensure that we clean up repos that we've cloned when using a remote + When we run secret-magpie-cli with engines: all + Then directory 7c484be0 will exist + And directory 42cbad53 will exist + + @localrepos + @wantsAWSSecret + Scenario: Ensure that the date field within the repo is parseable in ISO8601 format. + When we run secret-magpie-cli with engines: all + Then the date column of results.csv will be ISO8601 format + + @localrepos + @wantsAWSSecret + Scenario: Ensure that secret-magpie-cli gives the expected error when we run it with an invalid threshold date + When we run secret-magpie-cli in multi branch mode, ignoring commits older than invaliddate extra context disabled, secret storing enabled, output format csv and engines: all + Then secret-magpie-cli's output will be + """ + ERROR: Invalid ISO format string. + """ + + @localrepos + Scenario: Ensure that secret-magpie-cli gives the expected error when we provide an invalid gitleaks toml file + When we run secret-magpie-cli with a gitleaks rules_not_found.toml file + Then secret-magpie-cli's output will be + """ + ERROR: File at rules_not_found.toml not found. + """ diff --git a/main.py b/main.py index 9837851..e095f1c 100644 --- a/main.py +++ b/main.py @@ -25,6 +25,8 @@ args = argparsing.parse_args() cleanup = not (args.no_cleanup or "filesystem" == args.provider) + conf = {"gitleaks": {}} + if args.web: with open("template.html", "r", encoding="utf-8") as f: ag_grid_template = f.read() @@ -34,6 +36,14 @@ with open(args.to_scan_list, "r") as f: to_scan_list = f.read().split("\n") + if args.gl_config is not None: + try: + open(args.gl_config, "rb").close() + except FileNotFoundError: + print("ERROR: File at", args.gl_config, "not found.") + exit() + conf["gitleaks"]["config_file_path"] = args.gl_config + with open(os.devnull, "wb") as devnull: if args.update_ca_store: subprocess.call( # nosec subprocess_without_shell_equals_true start_process_with_partial_path @@ -65,6 +75,7 @@ f = partial( tasks.process_repo, functions=tool_list, + conf=conf, single_branch=args.single_branch, extra_context=args.extra_context, cleanup=cleanup, diff --git a/repos.py b/repos.py index cd04cbb..931b02f 100644 --- a/repos.py +++ b/repos.py @@ -1,72 +1,72 @@ -from git import Repo as GitRepo -from hashlib import sha256 - - -class RepoCredentials: - def __init__(self, password, username=""): - self.username = username - self.password = password - - def get_auth_string(self) -> str: - if self.username: - return f"{self.username}:{self.password}" - - return self.password - - -class Repo: - def __init__(self, clone_url, html_url, name, credentials: RepoCredentials) -> None: - self.credentials = credentials - self.html_url = html_url - self.name = name - self.clone_url = clone_url - - def clone_repo(self, validate_https=True): - path = sha256(self.clone_url.encode("utf-8")).hexdigest()[0:8] - if self.clone_url.lower()[0:8] != "https://": - raise Exception(f"clone url not in expected format: '{self.clone_url}'") - - target = f"https://{self.credentials.get_auth_string()}@{self.clone_url[8:]}" - if validate_https: - GitRepo.clone_from(target, path).remotes[0].fetch() - else: - GitRepo.clone_from(target, path, c="http.sslVerify=false").remotes[ - 0 - ].fetch() - return path - - def link_to_file(self, commit_hash, file_path, line_num): - raise NotImplementedError("This method must be overridden in child classes") - - -class GithubRepo(Repo): - def link_to_file(self, commit_hash, file_path, line_num) -> str: - return f"{self.html_url}/blob/{commit_hash}/{file_path}#L{line_num}" - - -class GitlabRepo(Repo): - def link_to_file(self, commit_hash, file_path, line_num) -> str: - return f"{self.html_url}/blob/{commit_hash}/{file_path}#L{line_num}" - - -class BitbucketRepo(Repo): - def link_to_file(self, commit_hash, file_path, line_num) -> str: - return f"{self.html_url}/src/{commit_hash}/{file_path}#lines-{line_num}" - - -class ADORepo(Repo): - def link_to_file(self, commit_hash, file_path, line_num): - return f"{self.html_url}/commit/{commit_hash}?path=%2F{file_path}" - - -class FilesystemRepo(Repo): - """Represents a repository that is already checked out in the local filesystem""" - - def __init__(self, clone_url): - super().__init__(clone_url, "", clone_url, None) - - def clone_repo(self, validate_https=False): - return self.clone_url - - def link_to_file(self, commit_hash, file_path, line_num): - return self.clone_url +from git import Repo as GitRepo +from hashlib import sha256 + + +class RepoCredentials: + def __init__(self, password, username=""): + self.username = username + self.password = password + + def get_auth_string(self) -> str: + if self.username: + return f"{self.username}:{self.password}" + + return self.password + + +class Repo: + def __init__(self, clone_url, html_url, name, credentials: RepoCredentials) -> None: + self.credentials = credentials + self.html_url = html_url + self.name = name + self.clone_url = clone_url + + def clone_repo(self, validate_https=True): + path = sha256(self.clone_url.encode("utf-8")).hexdigest()[0:8] + if self.clone_url.lower()[0:8] != "https://": + raise Exception(f"clone url not in expected format: '{self.clone_url}'") + + target = f"https://{self.credentials.get_auth_string()}@{self.clone_url[8:]}" + if validate_https: + GitRepo.clone_from(target, path).remotes[0].fetch() + else: + GitRepo.clone_from( + target, path, allow_unsafe_options=True, c="http.sslVerify=false" + ).remotes[0].fetch() + return path + + def link_to_file(self, commit_hash, file_path, line_num): + raise NotImplementedError("This method must be overridden in child classes") + + +class GithubRepo(Repo): + def link_to_file(self, commit_hash, file_path, line_num) -> str: + return f"{self.html_url}/blob/{commit_hash}/{file_path}#L{line_num}" + + +class GitlabRepo(Repo): + def link_to_file(self, commit_hash, file_path, line_num) -> str: + return f"{self.html_url}/blob/{commit_hash}/{file_path}#L{line_num}" + + +class BitbucketRepo(Repo): + def link_to_file(self, commit_hash, file_path, line_num) -> str: + return f"{self.html_url}/src/{commit_hash}/{file_path}#lines-{line_num}" + + +class ADORepo(Repo): + def link_to_file(self, commit_hash, file_path, line_num): + return f"{self.html_url}/commit/{commit_hash}?path=%2F{file_path}" + + +class FilesystemRepo(Repo): + """Represents a repository that is already checked out in the local filesystem""" + + def __init__(self, clone_url): + super().__init__(clone_url, "", clone_url, None) + + def clone_repo(self, validate_https=False): + return self.clone_url + + def link_to_file(self, commit_hash, file_path, line_num): + return self.clone_url diff --git a/tasks.py b/tasks.py index 55f1d3b..a7c6e85 100644 --- a/tasks.py +++ b/tasks.py @@ -94,6 +94,7 @@ def __repr__(self): def process_repo( repo, + conf, functions, single_branch=False, extra_context=False, @@ -130,7 +131,7 @@ def process_repo( repo, "SUCCESS", function.__name__, - function(path, repo, branch, extra_context), + function(path, repo, branch, extra_context, conf), ) ) except: @@ -233,6 +234,7 @@ def get_repos_from_ado(org, pat, dont_validate_https): f"https://dev.azure.com/{org}/_apis/projects", headers=headers, verify=not dont_validate_https, + timeout=60, ) if response.content == b"": @@ -245,6 +247,7 @@ def get_repos_from_ado(org, pat, dont_validate_https): f"https://dev.azure.com/{org}/{project}/_apis/git/repositories", headers=headers, verify=not dont_validate_https, + timeout=60, ) if response.content == b"": continue diff --git a/test-requirements.txt b/test-requirements.txt index 75e4007..cda6a36 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,6 +1 @@ behave==1.2.6 -gitpython==3.* -pygithub==1.* -python-gitlab==3.* -atlassian-python-api==3.* -requests==2.28.* diff --git a/tools.py b/tools.py index 9e5f6cd..19c80b7 100644 --- a/tools.py +++ b/tools.py @@ -4,7 +4,7 @@ from json import loads -def truffle_hog(path: str, repo, branch, extra_context): +def truffle_hog(path: str, repo, branch, extra_context, conf): target = "file://" + path.replace("\\", "/") truffle_hog = [ "trufflehog", @@ -29,10 +29,14 @@ def truffle_hog(path: str, repo, branch, extra_context): return ret -def gitleaks(path, repo, branch, extra_context): +def gitleaks(path, repo, branch, extra_context, conf): temp_path = f"{path}.out" gitleaks = ["gitleaks", "detect", "-s", path, "-r", temp_path] gitleaks.append(f"--log-opts={branch}") + + if "config_file_path" in conf["gitleaks"]: + gitleaks.append(f"--config={conf['gitleaks']['config_file_path']}") + result = run( # nosec B603 git branch has limited char set gitleaks, capture_output=True )