From c7cf6845c0e30ad9ab3e6c1c095330543a1e04f4 Mon Sep 17 00:00:00 2001 From: Matt Hammerly Date: Tue, 11 Jul 2023 18:32:48 -0700 Subject: [PATCH 1/4] sync_repos: add new repos to db when sync_repos=True instead of only updating existing --- requirements.in | 6 +- tasks/sync_repos.py | 51 +++++++++++++--- tasks/tests/unit/test_sync_repos_task.py | 77 ++++++++++++++---------- 3 files changed, 89 insertions(+), 45 deletions(-) diff --git a/requirements.in b/requirements.in index 1339c8dc6..5a64ab458 100644 --- a/requirements.in +++ b/requirements.in @@ -1,11 +1,11 @@ -git+ssh://git@github.com/codecov/shared.git@30212ca03f4cc53e1dd64434b7f77cec2e300e3b#egg=shared +git+ssh://git@github.com/codecov/shared.git@13805b0c52438fb7316d8d1d34ea25c9113cf375#egg=shared git+ssh://git@github.com/codecov/opentelem-python.git@v0.0.4a1#egg=codecovopentelem boto3 celery click coverage factory-boy -google-cloud-storage +google-cloud-storage>=2.10.0 httpx analytics-python==1.3.0b1 lxml>=4.9.1 @@ -32,4 +32,4 @@ stripe timestring vcrpy opentelemetry-instrumentation-celery -opentelemetry-sdk \ No newline at end of file +opentelemetry-sdk diff --git a/tasks/sync_repos.py b/tasks/sync_repos.py index e3e1325f9..8033faab7 100644 --- a/tasks/sync_repos.py +++ b/tasks/sync_repos.py @@ -5,6 +5,7 @@ from redis.exceptions import LockError from shared.celery_config import sync_repos_task_name from shared.torngit.exceptions import TorngitClientError +from sqlalchemy import and_ from app import celery_app from database.models import Owner, Repository @@ -77,17 +78,47 @@ async def run_async( log.warning("Unable to sync repos because another task is already doing it") async def sync_repos_using_integration(self, db_session, git, ownerid, username): - repo_service_ids = await git.list_repos_using_installation(username) - if repo_service_ids: - repo_service_ids = list(map(str, repo_service_ids)) - if repo_service_ids: - db_session.query(Repository).filter( - Repository.ownerid == ownerid, - Repository.service_id.in_(repo_service_ids), - Repository.using_integration.isnot(True), - ).update( - {Repository.using_integration: True}, synchronize_session=False + repos = await git.list_repos_using_installation(username) + if repos: + service_ids = {repo["id"] for repo in repos} + if service_ids: + # Querying through the `Repository` model is cleaner, but we + # need to go through the table object instead if we want to + # use a Postgres `RETURNING` clause like this. + table = Repository.__table__ + update_statement = ( + table.update() + .returning(table.columns.service_id) + .where( + and_( + table.columns.ownerid == ownerid, + table.columns.service_id.in_(service_ids), + ) + ) + .values(using_integration=True) ) + result = db_session.execute(update_statement) + updated_service_ids = {r[0] for r in result.fetchall()} + + # The set of repos our app can see minus the set of repos we + # just updated = the set of repos we need to insert. + missing_service_ids = service_ids - updated_service_ids + missing_repos = [ + repo for repo in repos if repo["id"] in missing_service_ids + ] + + for repo in missing_repos: + new_repo = Repository( + ownerid=ownerid, + service_id=repo["id"], + name=repo["name"], + language=repo["language"], + private=repo["private"], + branch=repo["default_branch"], + using_integration=True, + ) + db_session.add(new_repo) + db_session.flush() else: db_session.query(Repository).filter( Repository.ownerid == ownerid, Repository.using_integration.is_(True) diff --git a/tasks/tests/unit/test_sync_repos_task.py b/tasks/tests/unit/test_sync_repos_task.py index e7c473cad..aa4e50cdd 100644 --- a/tasks/tests/unit/test_sync_repos_task.py +++ b/tasks/tests/unit/test_sync_repos_task.py @@ -482,7 +482,10 @@ async def test_only_public_repos_not_in_db( @pytest.mark.asyncio async def test_sync_repos_using_integration( - self, mocker, mock_configuration, dbsession, codecov_vcr, mock_redis + self, + mocker, + dbsession, + mock_owner_provider, ): token = "ecd73a086eadc85db68747a66bdbd662a785a072" user = OwnerFactory.create( @@ -495,48 +498,58 @@ async def test_sync_repos_using_integration( ) dbsession.add(user) - repo_pytest = RepositoryFactory.create( - private=False, - name="pytest", - using_integration=False, - service_id="159089634", - owner=user, - ) - repo_spack = RepositoryFactory.create( - private=False, - name="spack", - using_integration=True, - service_id="164948070", - owner=user, - ) - repo_pub = RepositoryFactory.create( - private=False, - name="pub", - using_integration=None, - service_id="213786132", - owner=user, - ) - dbsession.add(repo_pytest) - dbsession.add(repo_spack) - dbsession.add(repo_pub) + def repo_obj(service_id, name, language, private, branch, using_integration): + return { + "id": service_id, + "name": name, + "language": language, + "private": private, + "default_branch": branch, + "using_integration": using_integration, + } + + mock_repos = [ + repo_obj("159089634", "pytest", "python", False, "main", True), + repo_obj("164948070", "spack", "python", False, "develop", False), + repo_obj("213786132", "pub", "dart", False, "master", None), + repo_obj("555555555", "soda", "python", False, "main", None), + ] + + # Mock GitHub response for repos that are visible to our app + mock_owner_provider.list_repos_using_installation.return_value = mock_repos + + # Three of the four repositories we can see are already in the database. + # Will we update `using_integration` correctly? + preseeded_repos = [] + for repo in mock_repos[:-1]: + preseeded_repos.append( + RepositoryFactory.create( + private=repo["private"], + name=repo["name"], + using_integration=repo["using_integration"], + service_id=repo["id"], + owner=user, + ) + ) + + for repo in preseeded_repos: + dbsession.add(repo) dbsession.flush() await SyncReposTask().run_async( dbsession, ownerid=user.ownerid, using_integration=True ) - dbsession.commit() repos = ( dbsession.query(Repository) - .filter( - Repository.service_id.in_( - (repo_pytest.service_id, repo_spack.service_id, repo_pub.service_id) - ) - ) + .filter(Repository.service_id.in_((repo["id"] for repo in mock_repos))) .all() ) - assert len(repos) == 3 + + # We pre-seeded 3 repos in the database, but we should have added the + # 4th based on our GitHub response + assert len(repos) == 4 assert user.permission == [] # there were no private repos for repo in repos: From 27fa8b1cf5d00c5d3d77684fefcc394ab188bfbd Mon Sep 17 00:00:00 2001 From: Matt Hammerly Date: Mon, 17 Jul 2023 15:02:32 -0700 Subject: [PATCH 2/4] update shared ref to corresponding sanitized ref, run pip-compile --- requirements.in | 2 +- requirements.txt | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/requirements.in b/requirements.in index 5a64ab458..9c9c004c5 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -git+ssh://git@github.com/codecov/shared.git@13805b0c52438fb7316d8d1d34ea25c9113cf375#egg=shared +git+ssh://git@github.com/codecov/shared.git@bc9933c6d61555ab36f310103feb6edd381d928f#egg=shared git+ssh://git@github.com/codecov/opentelem-python.git@v0.0.4a1#egg=codecovopentelem boto3 celery diff --git a/requirements.txt b/requirements.txt index 08b1e349b..3ff9cbd32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -77,32 +77,33 @@ deprecated==1.2.12 # via opentelemetry-api ecdsa==0.16.1 # via tlslite-ng -exceptiongroup==1.0.0 - # via pytest factory-boy==3.2.0 # via -r requirements.in faker==8.8.2 # via factory-boy freezegun==1.2.2 # via pytest-freezegun -google-api-core==1.26.1 - # via google-cloud-core -google-auth==1.27.1 +google-api-core==2.11.1 + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.21.0 # via # google-api-core # google-cloud-core # google-cloud-storage -google-cloud-core==1.6.0 + # shared +google-cloud-core==2.3.3 # via google-cloud-storage -google-cloud-storage==1.36.2 +google-cloud-storage==2.10.0 # via # -r requirements.in # shared google-crc32c==1.1.2 # via google-resumable-media -google-resumable-media==1.2.0 +google-resumable-media==2.5.0 # via google-cloud-storage -googleapis-common-protos==1.53.0 +googleapis-common-protos==1.59.1 # via google-api-core h11==0.12.0 # via httpcore @@ -162,9 +163,7 @@ opentelemetry-semantic-conventions==0.23b2 # opentelemetry-instrumentation-celery # opentelemetry-sdk packaging==20.9 - # via - # google-api-core - # pytest + # via pytest pluggy==0.13.1 # via pytest prompt-toolkit==3.0.28 @@ -172,6 +171,7 @@ prompt-toolkit==3.0.28 protobuf==3.20.3 # via # google-api-core + # googleapis-common-protos # shared psycopg2==2.9.3 # via -r requirements.in @@ -221,7 +221,6 @@ python-json-logger==0.1.11 pytz==2022.1 # via # celery - # google-api-core # timestring pyyaml==6.0 # via @@ -247,17 +246,14 @@ s3transfer==0.3.4 # via boto3 sentry-sdk==1.19.1 # via -r requirements.in -shared @ git+ssh://git@github.com/codecov/shared.git@30212ca03f4cc53e1dd64434b7f77cec2e300e3b +shared @ git+ssh://git@github.com/codecov/shared.git@bc9933c6d61555ab36f310103feb6edd381d928f # via -r requirements.in six==1.15.0 # via # analytics-python # click-repl # ecdsa - # google-api-core # google-auth - # google-cloud-core - # google-resumable-media # python-dateutil # sqlalchemy-utils # vcrpy @@ -287,8 +283,6 @@ timestring==1.6.4 # via -r requirements.in tlslite-ng==0.8.0-alpha39 # via shared -tomli==2.0.1 - # via pytest typing==3.7.4.3 # via shared typing-extensions==4.6.3 @@ -296,6 +290,7 @@ typing-extensions==4.6.3 urllib3==1.26.13 # via # botocore + # google-auth # minio # requests # sentry-sdk From 4cb2019d0006673b01275ba2bc055ff9ca5894fd Mon Sep 17 00:00:00 2001 From: Matt Hammerly Date: Tue, 18 Jul 2023 11:40:01 -0700 Subject: [PATCH 3/4] bump pyyaml to 6.0.1 to fix build --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3ff9cbd32..1b5bfc132 100644 --- a/requirements.txt +++ b/requirements.txt @@ -222,7 +222,7 @@ pytz==2022.1 # via # celery # timestring -pyyaml==6.0 +pyyaml==6.0.1 # via # -r requirements.in # vcrpy From 38a539ff284c82a8929d25571690f62fffd53054 Mon Sep 17 00:00:00 2001 From: Matt Hammerly Date: Tue, 18 Jul 2023 11:57:50 -0700 Subject: [PATCH 4/4] add mock redis to unit test --- tasks/tests/unit/test_sync_repos_task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/tests/unit/test_sync_repos_task.py b/tasks/tests/unit/test_sync_repos_task.py index aa4e50cdd..97e678999 100644 --- a/tasks/tests/unit/test_sync_repos_task.py +++ b/tasks/tests/unit/test_sync_repos_task.py @@ -486,6 +486,7 @@ async def test_sync_repos_using_integration( mocker, dbsession, mock_owner_provider, + mock_redis, ): token = "ecd73a086eadc85db68747a66bdbd662a785a072" user = OwnerFactory.create(