Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(fix)API: Correctly pass scheduled job proxy env vars when launching jobs #237

Merged
merged 1 commit into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions estela-api/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from datetime import timedelta

import redis
from django.conf import settings
import redis

from api import errors
from api.exceptions import DataBaseError
from config.job_manager import spiderdata_db_client
from core.models import SpiderJobEnvVar, ProxyProvider
from core.models import SpiderJobEnvVar


def update_env_vars(instance, env_vars, level="project", delete=True):
Expand Down Expand Up @@ -75,8 +75,7 @@ def delete_stats_from_redis(job):
pass


def get_proxy_provider_envs(proxy_id):
proxy_provider = ProxyProvider.objects.get(pk=proxy_id)
def get_proxy_provider_envs(proxy_provider):
proxy_attrs = [
"username",
"password",
Expand Down
18 changes: 6 additions & 12 deletions estela-api/api/views/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from rest_framework.response import Response

from api.filters import SpiderJobFilter
from api.mixins import BaseViewSet, ActionHandlerMixin
from api.mixins import ActionHandlerMixin, BaseViewSet
from api.serializers.job import (
SpiderJobCreateSerializer,
SpiderJobSerializer,
SpiderJobUpdateSerializer,
)
from api.utils import update_stats_from_redis, get_proxy_provider_envs
from api.utils import get_proxy_provider_envs, update_stats_from_redis
from config.job_manager import job_manager
from core.models import DataStatus, Project, Spider, SpiderJob, ProxyProvider
from core.models import DataStatus, Project, ProxyProvider, Spider, SpiderJob


class SpiderJobViewSet(
Expand Down Expand Up @@ -121,17 +121,11 @@ def create(self, request, *args, **kwargs):
env_var.name: env_var.value for env_var in job.env_vars.all()
}

proxy_provider_names = [
(proxy.name, proxy.proxyid) for proxy in ProxyProvider.objects.all()
]
proxy_name = job_env_vars.get("ESTELA_PROXY_NAME")

if proxy_name:
proxy_id = next(
(tup[1] for tup in proxy_provider_names if proxy_name in tup), None
)
if proxy_id:
proxy_env_vars = get_proxy_provider_envs(proxy_id)
proxy_provider = ProxyProvider.objects.filter(name=proxy_name).first()
if proxy_provider:
proxy_env_vars = get_proxy_provider_envs(proxy_provider)
job_env_vars.update(
{
env_var["name"]: env_var["value"]
Expand Down
25 changes: 23 additions & 2 deletions estela-api/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,21 @@
from rest_framework.authtoken.models import Token

from api.serializers.job import SpiderJobCreateSerializer
from api.utils import delete_stats_from_redis, update_stats_from_redis
from api.utils import (
delete_stats_from_redis,
get_proxy_provider_envs,
update_stats_from_redis,
)
from config.celery import app as celery_app
from config.job_manager import job_manager, spiderdata_db_client
from core.models import DataStatus, Project, Spider, SpiderJob, UsageRecord
from core.models import (
DataStatus,
Project,
ProxyProvider,
Spider,
SpiderJob,
UsageRecord,
)


def get_default_token(job):
Expand Down Expand Up @@ -86,6 +97,16 @@ def launch_job(sid_, data_, data_expiry_days=None, token=None):

job_args = {arg.name: arg.value for arg in job.args.all()}
job_env_vars = {env_var.name: env_var.value for env_var in job.env_vars.all()}

proxy_name = job_env_vars.get("ESTELA_PROXY_NAME")
if proxy_name:
proxy_provider = ProxyProvider.objects.filter(name=proxy_name).first()
if proxy_provider:
proxy_env_vars = get_proxy_provider_envs(proxy_provider)
job_env_vars.update(
{env_var["name"]: env_var["value"] for env_var in proxy_env_vars}
)

job_manager.create_job(
job.name,
job.key,
Expand Down
Loading