From aade338aaa5fe7a49427e3b7596f9ed45a3bae87 Mon Sep 17 00:00:00 2001 From: Trish Gillett-Kawamoto Date: Fri, 9 Aug 2024 13:09:20 -0600 Subject: [PATCH] fix: Tokens in config are no longer ignored when there are tokens in the environment, part of refactoring in preparation for app token refreshing (#284) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR continues the work started in https://github.com/MeltanoLabs/tap-github/pull/281, completing the major refactor. This is accomplished by creating PersonalTokenManager and AppTokenManager classes, moving logic to them, and adding tests. After this PR, I'll be able to easily implement refreshing for app tokens and other app token related features. In additional to the added unit tests, I have tested running an extractor on top of the proposed code, in a couple scenarios: with auth_token? | with additional_auth_tokens? | with personal tokens in env? | with app token? | # tokens that should be used | # tokens used in test run --|--|--|--|--|-- no | no | no | yes | 1 | 1 ✅ yes | yes (1) | no | yes | 3 | 3 ✅ no | yes (2) | no | yes | 3 | 3 ✅ I am not easily able to test the case of personal tokens being detected from the environment, just because changing the name of environment variables is not simple in my set up. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- tap_github/authenticator.py | 129 +++++++--- tap_github/tests/test_authenticator.py | 314 ++++++++++++++++++++++++- 2 files changed, 408 insertions(+), 35 deletions(-) diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index 13b20bde..8f5acf1e 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -3,10 +3,10 @@ import logging import time from copy import deepcopy -from datetime import datetime +from datetime import datetime, timedelta from os import environ from random import choice, shuffle -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple import jwt import requests @@ -15,7 +15,9 @@ class TokenManager: - """A class to store a token's attributes and state.""" + """A class to store a token's attributes and state. + This parent class should not be used directly, use a subclass instead. + """ DEFAULT_RATE_LIMIT = 5000 # The DEFAULT_RATE_LIMIT_BUFFER buffer serves two purposes: @@ -25,7 +27,7 @@ class TokenManager: def __init__( self, - token: str, + token: Optional[str], rate_limit_buffer: Optional[int] = None, logger: Optional[Any] = None, ): @@ -50,6 +52,9 @@ def update_rate_limit(self, response_headers: Any) -> None: def is_valid_token(self) -> bool: """Try making a request with the current token. If the request succeeds return True, else False.""" + if not self.token: + return False + try: response = requests.get( url="https://api.github.com/rate_limit", @@ -61,7 +66,7 @@ def is_valid_token(self) -> bool: return True except requests.exceptions.HTTPError: msg = ( - f"A token was dismissed. " + f"A token could not be validated. " f"{response.status_code} Client Error: " f"{str(response.content)} (Reason: {response.reason})" ) @@ -70,7 +75,7 @@ def is_valid_token(self) -> bool: return False def has_calls_remaining(self) -> bool: - """Check if token is valid. + """Check if a token has capacity to make more calls. Returns: True if the token is valid and has enough api calls remaining. @@ -85,6 +90,14 @@ def has_calls_remaining(self) -> bool: return True +class PersonalTokenManager(TokenManager): + """A class to store token rate limiting information.""" + + def __init__(self, token: str, rate_limit_buffer: Optional[int] = None, **kwargs): + """Init PersonalTokenRateLimit info.""" + super().__init__(token, rate_limit_buffer=rate_limit_buffer, **kwargs) + + def generate_jwt_token( github_app_id: str, github_private_key: str, @@ -110,7 +123,8 @@ def generate_app_access_token( github_app_id: str, github_private_key: str, github_installation_id: Optional[str] = None, -) -> str: +) -> Tuple[str, datetime]: + produced_at = datetime.now() jwt_token = generate_jwt_token(github_app_id, github_private_key) headers = {"Authorization": f"Bearer {jwt_token}"} @@ -135,14 +149,71 @@ def generate_app_access_token( if resp.status_code != 201: resp.raise_for_status() - return resp.json()["token"] + expires_at = produced_at + timedelta(hours=1) + return resp.json()["token"], expires_at + + +class AppTokenManager(TokenManager): + """A class to store an app token's attributes and state, and handle token refreshing""" + + DEFAULT_RATE_LIMIT = 15000 + DEFAULT_EXPIRY_BUFFER_MINS = 10 + + def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwargs): + if rate_limit_buffer is None: + rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER + super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs) + + parts = env_key.split(";;") + self.github_app_id = parts[0] + self.github_private_key = (parts[1:2] or [""])[0].replace("\\n", "\n") + self.github_installation_id: Optional[str] = (parts[2:3] or [""])[0] + + self.token_expires_at: Optional[datetime] = None + self.claim_token() + + def claim_token(self): + """Updates the TokenManager's token and token_expires_at attributes. + + The outcome will be _either_ that self.token is updated to a newly claimed valid token and + self.token_expires_at is updated to the anticipated expiry time (erring on the side of an early estimate) + _or_ self.token and self.token_expires_at are both set to None. + """ + self.token = None + self.token_expires_at = None + + # Make sure we have the details we need + if not self.github_app_id or not self.github_private_key: + raise ValueError( + "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " + '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----"' + ) + + self.token, self.token_expires_at = generate_app_access_token( + self.github_app_id, self.github_private_key, self.github_installation_id + ) + + # Check if the token isn't valid. If not, overwrite it with None + if not self.is_valid_token(): + if self.logger: + self.logger.warning( + "An app token was generated but could not be validated." + ) + self.token = None + self.token_expires_at = None class GitHubTokenAuthenticator(APIAuthenticatorBase): """Base class for offloading API auth.""" + @staticmethod + def get_env(): + return dict(environ) + def prepare_tokens(self) -> List[TokenManager]: - # Save GitHub tokens + """Prep GitHub tokens""" + + env_dict = self.get_env() rate_limit_buffer = self._config.get("rate_limit_buffer", None) personal_tokens: Set[str] = set() @@ -156,52 +227,42 @@ def prepare_tokens(self) -> List[TokenManager]: # Accept multiple tokens using environment variables GITHUB_TOKEN* env_tokens = { value - for key, value in environ.items() + for key, value in env_dict.items() if key.startswith("GITHUB_TOKEN") } if len(env_tokens) > 0: self.logger.info( f"Found {len(env_tokens)} 'GITHUB_TOKEN' environment variables for authentication." ) - personal_tokens = env_tokens + personal_tokens = personal_tokens.union(env_tokens) token_managers: List[TokenManager] = [] for token in personal_tokens: - token_manager = TokenManager( + token_manager = PersonalTokenManager( token, rate_limit_buffer=rate_limit_buffer, logger=self.logger ) if token_manager.is_valid_token(): token_managers.append(token_manager) + else: + logging.warn("A token was dismissed.") # Parse App level private key and generate a token - if "GITHUB_APP_PRIVATE_KEY" in environ.keys(): + if "GITHUB_APP_PRIVATE_KEY" in env_dict.keys(): # To simplify settings, we use a single env-key formatted as follows: # "{app_id};;{-----BEGIN RSA PRIVATE KEY-----\n_YOUR_PRIVATE_KEY_\n-----END RSA PRIVATE KEY-----}" - parts = environ["GITHUB_APP_PRIVATE_KEY"].split(";;") - github_app_id = parts[0] - github_private_key = (parts[1:2] or [""])[0].replace("\\n", "\n") - github_installation_id = (parts[2:3] or [""])[0] - - if not (github_private_key): - self.logger.warning( - "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " - '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\n_YOUR_P_KEY_\n-----END RSA PRIVATE KEY-----"' + env_key = env_dict["GITHUB_APP_PRIVATE_KEY"] + try: + app_token_manager = AppTokenManager( + env_key, rate_limit_buffer=rate_limit_buffer, logger=self.logger ) - - else: - app_token = generate_app_access_token( - github_app_id, github_private_key, github_installation_id or None - ) - token_manager = TokenManager( - app_token, rate_limit_buffer=rate_limit_buffer, logger=self.logger + if app_token_manager.is_valid_token(): + token_managers.append(app_token_manager) + except ValueError as e: + self.logger.warn( + f"An error was thrown while preparing an app token: {e}" ) - if token_manager.is_valid_token(): - token_managers.append(token_manager) self.logger.info(f"Tap will run with {len(token_managers)} auth tokens") - - # Create a dict of TokenManager - # TODO - separate app_token and add logic to refresh the token using generate_app_access_token. return token_managers def __init__(self, stream: RESTStream) -> None: diff --git a/tap_github/tests/test_authenticator.py b/tap_github/tests/test_authenticator.py index c63853ca..68227815 100644 --- a/tap_github/tests/test_authenticator.py +++ b/tap_github/tests/test_authenticator.py @@ -1,10 +1,17 @@ +import re from datetime import datetime, timedelta from unittest.mock import MagicMock, patch import pytest import requests +from singer_sdk.streams import RESTStream -from tap_github.authenticator import TokenManager +from tap_github.authenticator import ( + AppTokenManager, + GitHubTokenAuthenticator, + PersonalTokenManager, + TokenManager, +) class TestTokenManager: @@ -112,3 +119,308 @@ def test_has_calls_remaining_fails_if_few_calls_remaining_and_reset_time_not_rea token_manager.update_rate_limit(mock_response_headers) assert not token_manager.has_calls_remaining() + + +class TestAppTokenManager: + + def test_initialization_with_3_part_env_key(self): + with patch.object(AppTokenManager, "claim_token", return_value=None): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + assert token_manager.github_app_id == "12345" + assert token_manager.github_private_key == "key\ncontent" + assert token_manager.github_installation_id == "67890" + + def test_initialization_with_2_part_env_key(self): + with patch.object(AppTokenManager, "claim_token", return_value=None): + token_manager = AppTokenManager("12345;;key\\ncontent") + assert token_manager.github_app_id == "12345" + assert token_manager.github_private_key == "key\ncontent" + assert token_manager.github_installation_id == "" + + def test_initialization_with_malformed_env_key(self): + expected_error_expression = re.escape( + "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " + '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----"' + ) + with pytest.raises(ValueError, match=expected_error_expression): + AppTokenManager("12345key\\ncontent") + + def test_generate_token_with_invalid_credentials(self): + with patch.object(AppTokenManager, "is_valid_token", return_value=False), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("some_token", MagicMock()), + ): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + assert token_manager.token is None + assert token_manager.token_expires_at is None + + def test_successful_token_generation(self): + token_time = MagicMock() + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", token_time), + ): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + token_manager.claim_token() + assert token_manager.token == "valid_token" + assert token_manager.token_expires_at == token_time + + +@pytest.fixture +def mock_stream(): + stream = MagicMock(spec=RESTStream) + stream.logger = MagicMock() + stream.tap_name = "tap_github" + stream.config = {"rate_limit_buffer": 5} + return stream + + +class TestGitHubTokenAuthenticator: + + def test_prepare_tokens_returns_empty_if_none_found(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, "get_env", return_value={"GITHUB_TLJKJFDS": "gt1"} + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0 + + def test_config_auth_token_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + stream = mock_stream + stream.config.update({"auth_token": "gt5"}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 1 + assert token_managers[0].token == "gt5" + + def test_config_additional_auth_tokens_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + stream = mock_stream + stream.config.update({"additional_auth_tokens": ["gt7", "gt8", "gt9"]}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 3 + assert sorted({tm.token for tm in token_managers}) == ["gt7", "gt8", "gt9"] + + def test_env_personal_tokens_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 2 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt2"] + + def test_env_app_key_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123;;key", "OTHER_TOKEN": "blah"}, + ), patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 1 + assert token_managers[0].token == "installationtoken12345" + + def test_all_token_types(self, mock_stream): + # Expectations: + # - the presence of additional_auth_tokens causes personal tokens in the environment to be ignored. + # - the other types all coexist + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "GITHUB_APP_PRIVATE_KEY": "123;;key;;install_id", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 5 + assert sorted({tm.token for tm in token_managers}) == [ + "gt5", + "gt7", + "gt8", + "gt9", + "installationtoken12345", + ] + + def test_all_token_types_except_additional_auth_tokens(self, mock_stream): + # Expectations: + # - in the absence of additional_auth_tokens, all the other types can coexist + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "GITHUB_APP_PRIVATE_KEY": "123;;key;;install_id", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + { + "auth_token": "gt5", + } + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 4 + assert sorted({tm.token for tm in token_managers}) == [ + "gt1", + "gt2", + "gt5", + "installationtoken12345", + ] + + def test_auth_token_and_additional_auth_tokens_deduped(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + { + "auth_token": "gt1", + "additional_auth_tokens": ["gt1", "gt1", "gt8", "gt8", "gt9"], + } + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 3 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt8", "gt9"] + + def test_auth_token_and_env_tokens_deduped(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENa": "gt2", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update({"auth_token": "gt1"}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 2 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt2"] + + def test_handle_error_if_app_key_invalid(self, mock_stream): + # Confirm expected behaviour if an error is raised while setting up the app token manager: + # - don"t crash + # - print the error as a warning + # - continue with any other obtained tokens + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123garbagekey"}, + ), patch("tap_github.authenticator.AppTokenManager") as mock_app_manager: + mock_app_manager.side_effect = ValueError("Invalid key format") + + auth = GitHubTokenAuthenticator(stream=mock_stream) + auth.prepare_tokens() + + mock_stream.logger.warn.assert_called_with( + "An error was thrown while preparing an app token: Invalid key format" + ) + + def test_exclude_generated_app_token_if_invalid(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123;;key"}, + ), patch.object(AppTokenManager, "is_valid_token", return_value=False), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0 + + def test_prepare_tokens_returns_empty_if_all_tokens_invalid(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_TOKEN1": "gt1", "GITHUB_APP_PRIVATE_KEY": "123;;key"}, + ), patch.object( + PersonalTokenManager, "is_valid_token", return_value=False + ), patch.object( + AppTokenManager, "is_valid_token", return_value=False + ), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0