Skip to content

Commit

Permalink
Rework backfill task to create sub-tasks (#465)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-codecov authored May 24, 2024
1 parent 4234112 commit 9cbe122
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 153 deletions.
2 changes: 2 additions & 0 deletions celery_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def init_celery_tracing(*args, **kwargs):

# Backfill GH Apps
backfill_existing_gh_app_installations_name = "app.tasks.backfill_existing_gh_app_installations.BackfillExistingGHAppInstallationsTask"
backfill_existing_individual_gh_app_installation_name = "app.tasks.backfill_existing_individual_gh_app_installation.BackfillExistingIndividualGHAppInstallationTask"
backfill_owners_without_gh_app_installations_name = "app.tasks.backfill_owners_without_gh_app_installations.BackfillOwnersWithoutGHAppInstallationsTask"
backfill_owners_without_gh_app_installation_individual_name = "app.tasks.backfill_owners_without_gh_app_installation_individual.BackfillOwnersWithoutGHAppInstallationIndividualTask"

trial_expiration_task_name = "app.tasks.plan.TrialExpirationTask"
trial_expiration_cron_task_name = "app.cron.plan.TrialExpirationCronTask"
Expand Down
2 changes: 2 additions & 0 deletions tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
from tasks.backfill_commit_data_to_storage import backfill_commit_data_to_storage_task
from tasks.backfill_existing_gh_app_installations import (
backfill_existing_gh_app_installations_name,
backfill_existing_individual_gh_app_installation_name,
)
from tasks.backfill_owners_without_gh_app_installations import (
backfill_owners_without_gh_app_installation_individual_name,
backfill_owners_without_gh_app_installations_name,
)
from tasks.brolly_stats_rollup import brolly_stats_rollup_task
Expand Down
113 changes: 62 additions & 51 deletions tasks/backfill_existing_gh_app_installations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from sqlalchemy.orm.session import Session

from app import celery_app
from celery_config import backfill_existing_gh_app_installations_name
from celery_config import (
backfill_existing_gh_app_installations_name,
backfill_existing_individual_gh_app_installation_name,
)
from database.models.core import GithubAppInstallation, Owner
from helpers.backfills import (
add_repos_service_ids_from_provider,
Expand All @@ -19,13 +22,19 @@
class BackfillExistingGHAppInstallationsTask(
BaseCodecovTask, name=backfill_existing_gh_app_installations_name
):
def backfill_existing_gh_apps(
def run_impl(
self,
db_session: Session,
owner_ids: List[int] = None,
missed_owner_ids=[],
owner_ids: Optional[List[int]] = None,
yield_amount: int = 1000,
*args,
**kwargs,
):
log.info(
"Starting Existing GH App backfill task",
)

# Backfill gh apps we already have
# Get owners that have installations, and installations queries
owners_query = (
db_session.query(Owner)
Expand All @@ -48,70 +57,72 @@ def backfill_existing_gh_apps(
)

for gh_app_installation in gh_app_installations:
# Check if gh app has 'all' repositories selected
owner = gh_app_installation.owner
ownerid = gh_app_installation.owner.ownerid
self.app.tasks[
backfill_existing_individual_gh_app_installation_name
].apply_async(kwargs=dict(gh_app_installation_id=gh_app_installation.id))

try:
owner_service = get_owner_provider_service(owner=owner)
is_selection_all = maybe_set_installation_to_all_repos(
db_session=db_session,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)
return {"successful": True, "reason": "backfill tasks queued"}


RegisteredBackfillExistingGHAppInstallationsTask = celery_app.register_task(
BackfillExistingGHAppInstallationsTask()
)
backfill_existing_gh_app_installations_task = celery_app.tasks[
RegisteredBackfillExistingGHAppInstallationsTask.name
]

if not is_selection_all:
# Find and add all repos the gh app has access to
add_repos_service_ids_from_provider(
db_session=db_session,
ownerid=ownerid,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)
log.info("Successful backfill", extra=dict(ownerid=ownerid))
except:
log.info(
"Backfill unsuccessful for this owner", extra=dict(ownerid=ownerid)
)
missed_owner_ids.append(ownerid)
continue
del gh_app_installations

class BackfillExistingIndividualGHAppInstallationTask(
BaseCodecovTask, name=backfill_existing_individual_gh_app_installation_name
):
def run_impl(
self,
db_session: Session,
owner_ids: Optional[List[int]] = None,
gh_app_installation_id: int,
*args,
**kwargs,
):
log.info(
"Starting Existing GH App backfill task",
gh_app_installation = db_session.query(GithubAppInstallation).get(
gh_app_installation_id
)

missed_owner_ids = []

# Backfill gh apps we already have
self.backfill_existing_gh_apps(
db_session=db_session,
owner_ids=owner_ids,
missed_owner_ids=missed_owner_ids,
)
# Check if gh app has 'all' repositories selected
owner = gh_app_installation.owner
ownerid = gh_app_installation.owner.ownerid

log.info(
"Backfill for existing gh apps completed",
"Attempt to backfill gh_app_installation",
extra=dict(owner_id=ownerid, parent_id=self.request.parent_id),
)

log.info(
"Potential owner ids that didn't backfill",
extra=dict(missed_owner_ids=missed_owner_ids),
)
try:
owner_service = get_owner_provider_service(owner=owner)
is_selection_all = maybe_set_installation_to_all_repos(
db_session=db_session,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)

return {"successful": True, "reason": "backfill task finished"}
if not is_selection_all:
# Find and add all repos the gh app has access to
add_repos_service_ids_from_provider(
db_session=db_session,
ownerid=ownerid,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)
log.info("Successful backfill", extra=dict(ownerid=ownerid))
return {"successful": True, "reason": "backfill task finished"}
except:
log.info(
"Backfill unsuccessful for this owner", extra=dict(ownerid=ownerid)
)
return {"successful": False, "reason": "backfill unsuccessful"}


RegisteredBackfillExistingGHAppInstallationsTask = celery_app.register_task(
BackfillExistingGHAppInstallationsTask()
RegisteredBackfillExistingIndividualGHAppInstallationTask = celery_app.register_task(
BackfillExistingIndividualGHAppInstallationTask()
)
backfill_existing_gh_app_installations_task = celery_app.tasks[
RegisteredBackfillExistingGHAppInstallationsTask.name
backfill_existing_individual_gh_app_installation_task = celery_app.tasks[
RegisteredBackfillExistingIndividualGHAppInstallationTask.name
]
111 changes: 94 additions & 17 deletions tasks/backfill_owners_without_gh_app_installations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from sqlalchemy.orm.session import Session

from app import celery_app
from celery_config import backfill_owners_without_gh_app_installations_name
from celery_config import (
backfill_owners_without_gh_app_installation_individual_name,
backfill_owners_without_gh_app_installations_name,
)
from database.models.core import (
GITHUB_APP_INSTALLATION_DEFAULT_NAME,
GithubAppInstallation,
Expand All @@ -28,7 +31,6 @@ def backfill_owners_with_integration_without_gh_app(
self,
db_session: Session,
owner_ids: List[int] = None,
missed_owner_ids=[],
yield_amount: int = 1000,
):
owners_with_integration_id_without_gh_app_query = (
Expand Down Expand Up @@ -92,39 +94,50 @@ def backfill_owners_with_integration_without_gh_app(
log.info(
"Backfill unsuccessful for this owner", extra=dict(ownerid=ownerid)
)
missed_owner_ids.append(ownerid)
continue

def run_impl(
self,
db_session: Session,
owner_ids: Optional[List[int]] = None,
yield_amount: int = 1000,
*args,
**kwargs,
):
log.info(
"Starting backfill for owners without gh app task",
)

missed_owner_ids = []

# Backfill owners with legacy integration + adding new gh app
self.backfill_owners_with_integration_without_gh_app(
db_session=db_session,
owner_ids=owner_ids,
missed_owner_ids=missed_owner_ids,
owners_with_integration_id_without_gh_app_query = (
db_session.query(Owner)
.outerjoin(
GithubAppInstallation,
Owner.ownerid == GithubAppInstallation.ownerid,
)
.filter(
GithubAppInstallation.ownerid == None,
Owner.integration_id.isnot(None),
Owner.service == "github",
)
)

log.info(
"Backfill for owners without apps finished",
)
if owner_ids:
owners_with_integration_id_without_gh_app_query = (
owners_with_integration_id_without_gh_app_query.filter(
Owner.ownerid.in_(owner_ids)
)
)

log.info(
"Potential owner ids that didn't backfill",
extra=dict(missed_owner_ids=missed_owner_ids),
owners: List[Owner] = owners_with_integration_id_without_gh_app_query.yield_per(
yield_amount
)

return {"successful": True, "reason": "backfill task finished"}
for owner in owners:
self.app.tasks[
backfill_owners_without_gh_app_installation_individual_name
].apply_async(kwargs=dict(ownerid=owner.ownerid))

return {"successful": True, "reason": "backfill tasks queued"}


RegisterOwnersWithoutGHAppInstallations = celery_app.register_task(
Expand All @@ -133,3 +146,67 @@ def run_impl(
backfill_owners_without_gh_app_installations = celery_app.tasks[
RegisterOwnersWithoutGHAppInstallations.name
]


class BackfillOwnersWithoutGHAppInstallationIndividual(
BaseCodecovTask, name=backfill_owners_without_gh_app_installation_individual_name
):
def run_impl(
self,
db_session: Session,
ownerid: int,
*args,
**kwargs,
):
owner = db_session.query(Owner).get(ownerid)

log.info(
"Attempt to create GH App",
extra=dict(owner_id=ownerid, parent_id=self.request.parent_id),
)

try:
owner_service = get_owner_provider_service(owner=owner)

# Create new GH app installation and add all repos the gh app has access to
log.info(
"This owner has no Github App Installation",
extra=dict(ownerid=ownerid),
)
gh_app_installation = GithubAppInstallation(
owner=owner,
installation_id=owner.integration_id,
app_id=get_config("github", "integration", "id"),
name=GITHUB_APP_INSTALLATION_DEFAULT_NAME,
)
db_session.add(gh_app_installation)

is_selection_all = maybe_set_installation_to_all_repos(
db_session=db_session,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)

if not is_selection_all:
# Find and add all repos the gh app has access to
add_repos_service_ids_from_provider(
db_session=db_session,
ownerid=ownerid,
owner_service=owner_service,
gh_app_installation=gh_app_installation,
)
log.info("Successful backfill", extra=dict(ownerid=ownerid))
return {"successful": True, "reason": "backfill task finished"}
except:
log.info(
"Backfill unsuccessful for this owner", extra=dict(ownerid=ownerid)
)
return {"successful": False, "reason": "backfill unsuccessful"}


RegisterOwnersWithoutGHAppInstallationIndividual = celery_app.register_task(
BackfillOwnersWithoutGHAppInstallationIndividual()
)
backfill_owners_without_gh_app_installation_individual = celery_app.tasks[
RegisterOwnersWithoutGHAppInstallationIndividual.name
]
20 changes: 13 additions & 7 deletions tasks/tests/unit/test_backfill_existing_gh_app_installations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
)
from database.tests.factories.core import OwnerFactory, RepositoryFactory
from tasks.backfill_existing_gh_app_installations import (
BackfillExistingGHAppInstallationsTask,
BackfillExistingIndividualGHAppInstallationTask,
)


Expand Down Expand Up @@ -51,8 +51,10 @@ def test_gh_app_with_selection_all(
return_value=mock_repo_provider,
)

task = BackfillExistingGHAppInstallationsTask()
assert task.run_impl(dbsession, owner_ids=None) == {
task = BackfillExistingIndividualGHAppInstallationTask()
assert task.run_impl(
dbsession, gh_app_installation_id=gh_app_installation.id
) == {
"successful": True,
"reason": "backfill task finished",
}
Expand Down Expand Up @@ -98,8 +100,10 @@ def test_gh_app_with_specific_owner_ids(
return_value=mock_repo_provider,
)

task = BackfillExistingGHAppInstallationsTask()
assert task.run_impl(dbsession, owner_ids=[owner.ownerid]) == {
task = BackfillExistingIndividualGHAppInstallationTask()
assert task.run_impl(
dbsession, gh_app_installation_id=gh_app_installation.id
) == {
"successful": True,
"reason": "backfill task finished",
}
Expand Down Expand Up @@ -165,8 +169,10 @@ def test_gh_app_without_all_repo_selection(
return_value=mock_repo_provider,
)

task = BackfillExistingGHAppInstallationsTask()
assert task.run_impl(dbsession, owner_ids=None) == {
task = BackfillExistingIndividualGHAppInstallationTask()
assert task.run_impl(
dbsession, gh_app_installation_id=gh_app_installation.id
) == {
"successful": True,
"reason": "backfill task finished",
}
Expand Down
Loading

0 comments on commit 9cbe122

Please sign in to comment.