Skip to content

Commit

Permalink
(enh)estela-api: Improve data deletion (#238)
Browse files Browse the repository at this point in the history
* (enh)db_adapters: Directly drop collection when deleting data from MongoDB
* (enh)estela-api: Refactor data delete code and update responses
  • Loading branch information
mgonnav authored Jan 9, 2024
1 parent 26cd034 commit 702bdd6
Show file tree
Hide file tree
Showing 13 changed files with 49 additions and 118 deletions.
6 changes: 3 additions & 3 deletions database_adapters/db_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pymongo
from bson.objectid import ObjectId
from pymongo.errors import ConnectionFailure
from pymongo.errors import ConnectionFailure, PyMongoError


class InsertionResponse:
Expand Down Expand Up @@ -87,12 +87,12 @@ def get_connection(self):

def delete_collection_data(self, database_name, collection_name):
collection = self.client[database_name][collection_name]
count = collection.delete_many({}).deleted_count
try:
collection.drop()
return True
except PyMongoError as ex:
print(ex)
return count
return False

def get_collection_data(self, database_name, collection_name, limit=10000):
collection = self.client[database_name][collection_name]
Expand Down
4 changes: 0 additions & 4 deletions estela-api/api/serializers/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ def update(self, instance, validated_data):
return instance


class DeleteJobDataSerializer(serializers.Serializer):
count = serializers.IntegerField(required=True, help_text="Deleted items count.")


class ProjectJobSerializer(serializers.Serializer):
results = SpiderJobSerializer(many=True, required=True, help_text="Project jobs.")
count = serializers.IntegerField(required=True, help_text="Project jobs count.")
34 changes: 17 additions & 17 deletions estela-api/api/views/job_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from api import errors
from api.exceptions import DataBaseError
from api.mixins import BaseViewSet
from api.serializers.job import DeleteJobDataSerializer
from config.job_manager import spiderdata_db_client
from core.models import SpiderJob
from core.tasks import get_chain_to_process_usage_data
Expand Down Expand Up @@ -99,7 +98,7 @@ def list(self, request, *args, **kwargs):
raise DataBaseError({"error": errors.UNABLE_CONNECT_DB})

job = SpiderJob.objects.filter(jid=kwargs["jid"]).get()
job_collection_name = self.get_collection_name(job, data_type, **kwargs)
job_collection_name = self.get_collection_name(job, data_type)

count = spiderdata_db_client.get_estimated_document_count(
kwargs["pid"], job_collection_name
Expand Down Expand Up @@ -148,18 +147,18 @@ def list(self, request, *args, **kwargs):
}
)

def get_collection_name(self, job, data_type, **kwargs):
def get_collection_name(self, job, data_type):
if (
job.cronjob is not None
and job.cronjob.unique_collection
and data_type == "items"
):
job_collection_name = "{}-scj{}-job_{}".format(
kwargs["sid"], job.cronjob.cjid, data_type
job.spider.sid, job.cronjob.cjid, data_type
)
else:
job_collection_name = "{}-{}-job_{}".format(
kwargs["sid"], kwargs["jid"], data_type
job.spider.sid, job.jid, data_type
)

return job_collection_name
Expand Down Expand Up @@ -197,7 +196,7 @@ def download(self, request, *args, **kwargs):
data_type = request.query_params.get("type", "items")

job = SpiderJob.objects.filter(jid=kwargs["jid"]).get()
job_collection_name = self.get_collection_name(job, data_type, **kwargs)
job_collection_name = self.get_collection_name(job, data_type)

data = []
if data_type == "stats":
Expand All @@ -220,7 +219,12 @@ def download(self, request, *args, **kwargs):

@swagger_auto_schema(
methods=["POST"],
responses={status.HTTP_200_OK: DeleteJobDataSerializer()},
responses={
status.HTTP_200_OK: openapi.Response(description="Deletion successful"),
status.HTTP_404_NOT_FOUND: openapi.Response(
description="Could not delete data"
),
},
manual_parameters=[
openapi.Parameter(
"type",
Expand All @@ -238,20 +242,16 @@ def delete(self, request, *args, **kwargs):
if not spiderdata_db_client.get_connection():
raise DataBaseError({"error": errors.UNABLE_CONNECT_DB})

job_collection_name = self.get_collection_name(
job, data_type, kwargs["sid"], kwargs["jid"]
)
count = spiderdata_db_client.delete_collection_data(
job_collection_name = self.get_collection_name(job, data_type)
deleted_data = spiderdata_db_client.delete_collection_data(
kwargs["pid"], job_collection_name
)
chain_of_usage_process = get_chain_to_process_usage_data(
after_delete=True, project_id=job.spider.project.pid, job_id=job.jid
)
chain_of_usage_process.apply_async()

return Response(
{
"count": count,
},
status=status.HTTP_200_OK,
)
if deleted_data:
return Response(status=status.HTTP_200_OK)
else:
return Response(status=status.HTTP_404_NOT_FOUND)
5 changes: 2 additions & 3 deletions estela-api/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,8 @@ def record_project_usage_after_data_delete(project_id, job_id):
@celery_app.task()
def delete_job_data(job_key):
jid, sid, pid = job_key.split(".")
delete_data(pid, sid, jid, "items")
delete_data(pid, sid, jid, "requests")
delete_data(pid, sid, jid, "logs")
for data_type in ["items", "requests", "logs"]:
delete_data(pid, sid, jid, data_type)
SpiderJob.objects.filter(jid=jid).update(data_status=DataStatus.DELETED_STATUS)
record_project_usage_after_data_delete(pid, int(jid))

Expand Down
19 changes: 7 additions & 12 deletions estela-api/docs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1205,9 +1205,9 @@ paths:
type: string
responses:
'200':
description: ''
schema:
$ref: '#/definitions/DeleteJobData'
description: Deletion successful
'404':
description: Could not delete data
tags:
- api
parameters:
Expand Down Expand Up @@ -2647,15 +2647,10 @@ definitions:
title: Data expiry days
description: Job data expiry days.
type: integer
DeleteJobData:
required:
- count
type: object
properties:
count:
title: Count
description: Deleted items count.
type: integer
proxy_usage_data:
title: Proxy usage data
description: Proxy Usage data.
type: string
UsageRecord:
required:
- processing_time
Expand Down
10 changes: 3 additions & 7 deletions estela-web/src/pages/JobDataListPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ import { RouteComponentProps } from "react-router-dom";

import "./styles.scss";
import { ApiService } from "../../services";
import {
ApiProjectsSpidersJobsDataListRequest,
ApiProjectsSpidersJobsDataDeleteRequest,
DeleteJobData,
} from "../../services/api";
import { ApiProjectsSpidersJobsDataListRequest, ApiProjectsSpidersJobsDataDeleteRequest } from "../../services/api";
import { resourceNotAllowedNotification, dataDeletedNotification, Spin } from "../../shared";

const { Content } = Layout;
Expand Down Expand Up @@ -59,9 +55,9 @@ export class JobDataListPage extends Component<RouteComponentProps<RouteParams>,
type: this.type,
};
this.apiService.apiProjectsSpidersJobsDataDelete(request).then(
(response: DeleteJobData) => {
() => {
this.setState({ data: [], count: 0, current: 0, loaded: true });
dataDeletedNotification(response.count);
dataDeletedNotification();
},
(error: unknown) => {
error;
Expand Down
6 changes: 3 additions & 3 deletions estela-web/src/pages/JobDataPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {
ApiProjectsSpidersJobsDataListRequest,
ApiProjectsSpidersJobsDataDeleteRequest,
ApiProjectsSpidersJobsDataDownloadRequest,
DeleteJobData,
InlineResponse2006,
InlineResponse2008,
} from "../../services/api";
Expand Down Expand Up @@ -43,8 +42,8 @@ const deleteSpiderJobData = (type_: string, projectId: string, spiderId: string,
type: type_,
};
return apiService.apiProjectsSpidersJobsDataDelete(request).then(
(response: DeleteJobData) => {
dataDeletedNotification(response.count);
() => {
dataDeletedNotification();
return true;
},
(error: unknown) => {
Expand Down Expand Up @@ -656,6 +655,7 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
const [count, setCount] = useState(0);
const [loaded, setLoaded] = useState(false);
const [logs, setLogs] = useState<Dictionary[]>([]);

useEffect(() => {
getData("logs", 1, projectId, spiderId, jobId).then((response) => {
let data: Dictionary[] = [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ models/Activity.ts
models/AuthToken.ts
models/ChangePassword.ts
models/CoverageStats.ts
models/DeleteJobData.ts
models/Deploy.ts
models/DeployCreate.ts
models/DeployUpdate.ts
Expand Down
12 changes: 4 additions & 8 deletions estela-web/src/services/api/generated-api/apis/ApiApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ import {
ChangePassword,
ChangePasswordFromJSON,
ChangePasswordToJSON,
DeleteJobData,
DeleteJobDataFromJSON,
DeleteJobDataToJSON,
Deploy,
DeployFromJSON,
DeployToJSON,
Expand Down Expand Up @@ -2071,7 +2068,7 @@ export class ApiApi extends runtime.BaseAPI {

/**
*/
async apiProjectsSpidersJobsDataDeleteRaw(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<runtime.ApiResponse<DeleteJobData>> {
async apiProjectsSpidersJobsDataDeleteRaw(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<runtime.ApiResponse<void>> {
if (requestParameters.jid === null || requestParameters.jid === undefined) {
throw new runtime.RequiredError('jid','Required parameter requestParameters.jid was null or undefined when calling apiProjectsSpidersJobsDataDelete.');
}
Expand Down Expand Up @@ -2106,14 +2103,13 @@ export class ApiApi extends runtime.BaseAPI {
query: queryParameters,
});

return new runtime.JSONApiResponse(response, (jsonValue) => DeleteJobDataFromJSON(jsonValue));
return new runtime.VoidApiResponse(response);
}

/**
*/
async apiProjectsSpidersJobsDataDelete(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<DeleteJobData> {
const response = await this.apiProjectsSpidersJobsDataDeleteRaw(requestParameters);
return await response.value();
async apiProjectsSpidersJobsDataDelete(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<void> {
await this.apiProjectsSpidersJobsDataDeleteRaw(requestParameters);
}

/**
Expand Down
57 changes: 0 additions & 57 deletions estela-web/src/services/api/generated-api/models/DeleteJobData.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ export interface SpiderJobUpdate {
* @memberof SpiderJobUpdate
*/
dataExpiryDays?: number;
/**
* Proxy Usage data.
* @type {string}
* @memberof SpiderJobUpdate
*/
proxyUsageData?: string;
}

/**
Expand Down Expand Up @@ -108,6 +114,7 @@ export function SpiderJobUpdateFromJSONTyped(json: any, ignoreDiscriminator: boo
'requestCount': !exists(json, 'request_count') ? undefined : json['request_count'],
'dataStatus': !exists(json, 'data_status') ? undefined : json['data_status'],
'dataExpiryDays': !exists(json, 'data_expiry_days') ? undefined : json['data_expiry_days'],
'proxyUsageData': !exists(json, 'proxy_usage_data') ? undefined : json['proxy_usage_data'],
};
}

Expand All @@ -127,6 +134,7 @@ export function SpiderJobUpdateToJSON(value?: SpiderJobUpdate | null): any {
'request_count': value.requestCount,
'data_status': value.dataStatus,
'data_expiry_days': value.dataExpiryDays,
'proxy_usage_data': value.proxyUsageData,
};
}

Expand Down
1 change: 0 additions & 1 deletion estela-web/src/services/api/generated-api/models/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ export * from './Activity';
export * from './AuthToken';
export * from './ChangePassword';
export * from './CoverageStats';
export * from './DeleteJobData';
export * from './Deploy';
export * from './DeployCreate';
export * from './DeployUpdate';
Expand Down
4 changes: 2 additions & 2 deletions estela-web/src/shared/notifications/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ export const resourceNotAllowedNotification = (): void => {
history.push("/");
};

export const dataDeletedNotification = (n: number): void => {
export const dataDeletedNotification = (): void => {
notification.open({
message: "Data Successfully Deleted",
description: `${n} Items have been deleted`,
description: "All items have been deleted",
});
};

Expand Down

0 comments on commit 702bdd6

Please sign in to comment.