Skip to content

Commit

Permalink
[MRG] Add bmc proxies (#231)
Browse files Browse the repository at this point in the history
* Add proxy settings.
* Add Estela Proxies.
* Add billing.
* Add migrations.
* Add Proxy Env Var.

---------

Co-authored-by: mgonnav <[email protected]>
  • Loading branch information
joaquingx and mgonnav authored Oct 18, 2023
1 parent 6b3e6d5 commit c234699
Show file tree
Hide file tree
Showing 39 changed files with 2,013 additions and 68 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ __pycache__/
.DS_Store

# Certificates
*.crt
*.crt

bitmaker_billing/
13 changes: 11 additions & 2 deletions estela-api/api/serializers/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,13 @@ class SpiderJobUpdateSerializer(serializers.ModelSerializer):
SpiderJob.RUNNING_STATUS,
]

job_fields = ["lifespan", "total_response_bytes", "item_count", "request_count"]
job_fields = [
"lifespan",
"total_response_bytes",
"item_count",
"request_count",
"proxy_usage_data",
]

class Meta:
model = SpiderJob
Expand All @@ -155,6 +161,7 @@ class Meta:
"request_count",
"data_status",
"data_expiry_days",
"proxy_usage_data",
)

def update(self, instance, validated_data):
Expand Down Expand Up @@ -188,7 +195,9 @@ def update(self, instance, validated_data):
instance.status = status

for field in self.job_fields:
if not getattr(instance, field):
if not getattr(instance, field) or getattr(
instance, field
) != validated_data.get(field):
new_value = validated_data.get(field, getattr(instance, field))
setattr(instance, field, new_value)

Expand Down
23 changes: 23 additions & 0 deletions estela-api/api/serializers/proxyprovider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from rest_framework import serializers
from core.models import ProxyProvider
from api.serializers.job_specific import SpiderJobEnvVarSerializer


class ProxyProviderSerializer(serializers.ModelSerializer):
class Meta:
model = ProxyProvider
fields = ["name", "description", "proxyid"]


class ProxyProviderUpdateSerializer(serializers.Serializer):
level = serializers.CharField(max_length=100, help_text="Spider or project")
project_or_spider_id = serializers.CharField(
max_length=100, help_text="Project id where the update will be performed"
)


class ProxyProviderResponseSerializer(serializers.Serializer):
success = serializers.BooleanField()
env_vars = SpiderJobEnvVarSerializer(
many=True, required=False, help_text="Env vars for the instace(project, spider)"
)
5 changes: 5 additions & 0 deletions estela-api/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
job_data as job_data_views,
stats as stats_views,
notification as notification_views,
proxyprovider as proxyprovider_views,
)

router = routers.DefaultRouter(trailing_slash=False)
Expand Down Expand Up @@ -58,6 +59,10 @@
viewset=stats_views.SpidersJobsStatsViewSet,
basename="spider-stats",
)
router.register(
prefix=r"proxy_provider",
viewset=proxyprovider_views.ProxyProviderViewSet,
)
router.register(prefix=r"auth", viewset=auth_views.AuthAPIViewSet, basename="auth")
router.register(
prefix=r"auth/profile", viewset=auth_views.UserProfileViewSet, basename="profile"
Expand Down
47 changes: 42 additions & 5 deletions estela-api/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from api import errors
from api.exceptions import DataBaseError
from config.job_manager import spiderdata_db_client
from core.models import SpiderJobEnvVar
from core.models import SpiderJobEnvVar, ProxyProvider


def update_env_vars(instance, env_vars, level="project"):
def update_env_vars(instance, env_vars, level="project", delete=True):
env_vars_instance = instance.env_vars.all()
for env_var in env_vars:
if env_vars_instance.filter(**env_var).exists():
Expand All @@ -29,9 +29,10 @@ def update_env_vars(instance, env_vars, level="project"):
elif level == "spider":
SpiderJobEnvVar.objects.create(spider=instance, **env_var)

for env_var in env_vars_instance:
if env_var.name not in [value["name"] for value in env_vars]:
env_var.delete()
if delete:
for env_var in env_vars_instance:
if env_var.name not in [value["name"] for value in env_vars]:
env_var.delete()


def update_stats_from_redis(job, save_to_database=False):
Expand Down Expand Up @@ -72,3 +73,39 @@ def delete_stats_from_redis(job):
redis_conn.delete(f"scrapy_stats_{job.key}")
except:
pass


def get_proxy_provider_envs(proxy_id):
proxy_provider = ProxyProvider.objects.get(pk=proxy_id)
proxy_attrs = [
"username",
"password",
"host",
"port",
"name",
]
fields_and_values = vars(proxy_provider)
replaces = {
"password": "pass",
"host": "url",
"username": "user",
}
env_vars = []
for field, value in fields_and_values.items():
if field in proxy_attrs:
name = replaces.get(field, field).upper()
if name != "NAME":
masked = True
else:
masked = False
env_vars.append(
{"name": f"ESTELA_PROXY_{name}", "value": value, "masked": masked}
)
env_vars.append(
{
"name": "ESTELA_PROXIES_ENABLED",
"value": "True",
"masked": False,
}
)
return env_vars
23 changes: 21 additions & 2 deletions estela-api/api/views/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
SpiderJobSerializer,
SpiderJobUpdateSerializer,
)
from api.utils import update_stats_from_redis
from api.utils import update_stats_from_redis, get_proxy_provider_envs
from config.job_manager import job_manager
from core.models import DataStatus, Project, Spider, SpiderJob
from core.models import DataStatus, Project, Spider, SpiderJob, ProxyProvider


class SpiderJobViewSet(
Expand Down Expand Up @@ -120,6 +120,25 @@ def create(self, request, *args, **kwargs):
job_env_vars = {
env_var.name: env_var.value for env_var in job.env_vars.all()
}

proxy_provider_names = [
(proxy.name, proxy.proxyid) for proxy in ProxyProvider.objects.all()
]
proxy_name = job_env_vars.get("ESTELA_PROXY_NAME")

if proxy_name:
proxy_id = next(
(tup[1] for tup in proxy_provider_names if proxy_name in tup), None
)
if proxy_id:
proxy_env_vars = get_proxy_provider_envs(proxy_id)
job_env_vars.update(
{
env_var["name"]: env_var["value"]
for env_var in proxy_env_vars
}
)

token = request.auth.key if request.auth else None
job_manager.create_job(
job.name,
Expand Down
85 changes: 85 additions & 0 deletions estela-api/api/views/proxyprovider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from rest_framework import viewsets, status
from rest_framework.response import Response
from rest_framework import serializers
from core.models import ProxyProvider, Project, Spider, SpiderJobEnvVar, SpiderJob
from api.serializers.proxyprovider import (
ProxyProviderUpdateSerializer,
ProxyProviderSerializer,
ProxyProviderResponseSerializer,
)
from api.serializers.job_specific import SpiderJobEnvVarSerializer
from api.mixins import BaseViewSet, ActionHandlerMixin
from drf_yasg.utils import swagger_auto_schema
from api.utils import update_env_vars

# from utils import update_env_vars


class ProxyProviderViewSet(BaseViewSet, viewsets.ModelViewSet, ActionHandlerMixin):
queryset = ProxyProvider.objects.all()
serializer_class = ProxyProviderSerializer

@swagger_auto_schema(
request_body=ProxyProviderUpdateSerializer, # Especifica el serializer para la solicitud
responses={
status.HTTP_200_OK: ProxyProviderResponseSerializer()
}, # Define las respuestas
)
def update(self, request, *args, **kwargs):
"In the request we should specify spider, project or job"
serializer = ProxyProviderUpdateSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
if serializer.validated_data["level"] == "project":
instance = Project.objects.get(
pk=serializer.validated_data["project_or_spider_id"]
)
elif serializer.validated_data["level"] == "spider":
instance = SpiderJob.objects.get(
pk=serializer.validated_data["project_or_spider_id"]
)
proxy_provider = self.get_object()
proxy_attrs = [
"username",
"password",
"host",
"port",
"name",
]
fields_and_values = vars(proxy_provider)
replaces = {
"password": "pass",
"host": "url",
"username": "user",
}
env_vars = []
for field, value in fields_and_values.items():
if field in proxy_attrs:
name = replaces.get(field, field).upper()
if name != "NAME":
masked = True
else:
masked = False
env_vars.append(
{"name": f"ESTELA_PROXY_{name}", "value": value, "masked": masked}
)
update_env_vars(
instance, env_vars, level=serializer.validated_data["level"], delete=False
)

if serializer.validated_data["level"] == "project":
env_vars_instance = SpiderJobEnvVar.objects.filter(
project_id=serializer.validated_data["project_or_spider_id"]
)
if serializer.validated_data["level"] == "spider":
env_vars_instance = SpiderJobEnvVar.objects.filter(
spider_id=serializer.validated_data["project_or_spider_id"]
)
env_vars_serialized = SpiderJobEnvVarSerializer(
env_vars_instance, required=False, many=True
)
resp_serializer = ProxyProviderResponseSerializer(
data={"success": True, "env_vars": env_vars_serialized.data}
)
# response_ser = ProxyProviderResponseSerializer(data=rspse)
resp_serializer.is_valid()
return Response(resp_serializer.data, status=status.HTTP_200_OK)
11 changes: 11 additions & 0 deletions estela-api/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
EMAIL_HOST=(str, "dummy"),
EMAIL_PORT=(int, "dummy"),
VERIFICATION_EMAIL=(str, "dummy"),
RESERVED_PROXY_NAMES=(str, ""),
)
environ.Env.read_env(env_file=".env")

Expand Down Expand Up @@ -298,3 +299,13 @@

# Verification Email
VERIFICATION_EMAIL = env("VERIFICATION_EMAIL")

# Proxy (Optional)Settings
RESERVED_PROXY_NAMES = (
[]
if env("RESERVED_PROXY_NAMES") == ""
else [
(name.replace("_", " ").title(), f"{name}_usage")
for name in env("RESERVED_PROXY_NAMES").split(",")
]
)
15 changes: 13 additions & 2 deletions estela-api/core/admin.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
from django.contrib import admin

from core.models import (
Project,
ProxyProvider,
Spider,
SpiderJob,
SpiderJobArg,
SpiderJobEnvVar,
UsageRecord,
Permission,
)
from django.contrib import admin


@admin.register(Project)
class ProjectAdmin(admin.ModelAdmin):
pass


@admin.register(Permission)
class PermissionAdmin(admin.ModelAdmin):
pass


@admin.register(Spider)
class SpiderAdmin(admin.ModelAdmin):
pass
Expand All @@ -39,3 +45,8 @@ class SpiderJobAdmin(admin.ModelAdmin):
@admin.register(UsageRecord)
class UsageRecordAdmin(admin.ModelAdmin):
pass


@admin.register(ProxyProvider)
class ProxyProviderAdmin(admin.ModelAdmin):
pass
Loading

0 comments on commit c234699

Please sign in to comment.