Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(enh)estela-api: Improve data deletion and add a small fix #238

Merged
merged 4 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions database_adapters/db_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pymongo
from bson.objectid import ObjectId
from pymongo.errors import ConnectionFailure
from pymongo.errors import ConnectionFailure, PyMongoError


class InsertionResponse:
Expand Down Expand Up @@ -87,12 +87,12 @@ def get_connection(self):

def delete_collection_data(self, database_name, collection_name):
collection = self.client[database_name][collection_name]
count = collection.delete_many({}).deleted_count
try:
collection.drop()
return True
except PyMongoError as ex:
print(ex)
return count
return False

def get_collection_data(self, database_name, collection_name, limit=10000):
collection = self.client[database_name][collection_name]
Expand Down
4 changes: 0 additions & 4 deletions estela-api/api/serializers/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ def update(self, instance, validated_data):
return instance


class DeleteJobDataSerializer(serializers.Serializer):
count = serializers.IntegerField(required=True, help_text="Deleted items count.")


class ProjectJobSerializer(serializers.Serializer):
results = SpiderJobSerializer(many=True, required=True, help_text="Project jobs.")
count = serializers.IntegerField(required=True, help_text="Project jobs count.")
34 changes: 17 additions & 17 deletions estela-api/api/views/job_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from api import errors
from api.exceptions import DataBaseError
from api.mixins import BaseViewSet
from api.serializers.job import DeleteJobDataSerializer
from config.job_manager import spiderdata_db_client
from core.models import SpiderJob
from core.tasks import get_chain_to_process_usage_data
Expand Down Expand Up @@ -99,7 +98,7 @@ def list(self, request, *args, **kwargs):
raise DataBaseError({"error": errors.UNABLE_CONNECT_DB})

job = SpiderJob.objects.filter(jid=kwargs["jid"]).get()
job_collection_name = self.get_collection_name(job, data_type, **kwargs)
job_collection_name = self.get_collection_name(job, data_type)

count = spiderdata_db_client.get_estimated_document_count(
kwargs["pid"], job_collection_name
Expand Down Expand Up @@ -148,18 +147,18 @@ def list(self, request, *args, **kwargs):
}
)

def get_collection_name(self, job, data_type, **kwargs):
def get_collection_name(self, job, data_type):
if (
job.cronjob is not None
and job.cronjob.unique_collection
and data_type == "items"
):
job_collection_name = "{}-scj{}-job_{}".format(
kwargs["sid"], job.cronjob.cjid, data_type
job.spider.sid, job.cronjob.cjid, data_type
)
else:
job_collection_name = "{}-{}-job_{}".format(
kwargs["sid"], kwargs["jid"], data_type
job.spider.sid, job.jid, data_type
)

return job_collection_name
Expand Down Expand Up @@ -197,7 +196,7 @@ def download(self, request, *args, **kwargs):
data_type = request.query_params.get("type", "items")

job = SpiderJob.objects.filter(jid=kwargs["jid"]).get()
job_collection_name = self.get_collection_name(job, data_type, **kwargs)
job_collection_name = self.get_collection_name(job, data_type)

data = []
if data_type == "stats":
Expand All @@ -220,7 +219,12 @@ def download(self, request, *args, **kwargs):

@swagger_auto_schema(
methods=["POST"],
responses={status.HTTP_200_OK: DeleteJobDataSerializer()},
responses={
status.HTTP_200_OK: openapi.Response(description="Deletion successful"),
status.HTTP_404_NOT_FOUND: openapi.Response(
description="Could not delete data"
),
},
manual_parameters=[
openapi.Parameter(
"type",
Expand All @@ -238,20 +242,16 @@ def delete(self, request, *args, **kwargs):
if not spiderdata_db_client.get_connection():
raise DataBaseError({"error": errors.UNABLE_CONNECT_DB})

job_collection_name = self.get_collection_name(
job, data_type, kwargs["sid"], kwargs["jid"]
)
count = spiderdata_db_client.delete_collection_data(
job_collection_name = self.get_collection_name(job, data_type)
deleted_data = spiderdata_db_client.delete_collection_data(
kwargs["pid"], job_collection_name
)
chain_of_usage_process = get_chain_to_process_usage_data(
after_delete=True, project_id=job.spider.project.pid, job_id=job.jid
)
chain_of_usage_process.apply_async()

return Response(
{
"count": count,
},
status=status.HTTP_200_OK,
)
if deleted_data:
return Response(status=status.HTTP_200_OK)
else:
return Response(status=status.HTTP_404_NOT_FOUND)
5 changes: 2 additions & 3 deletions estela-api/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,8 @@ def record_project_usage_after_data_delete(project_id, job_id):
@celery_app.task()
def delete_job_data(job_key):
jid, sid, pid = job_key.split(".")
delete_data(pid, sid, jid, "items")
delete_data(pid, sid, jid, "requests")
delete_data(pid, sid, jid, "logs")
for data_type in ["items", "requests", "logs"]:
delete_data(pid, sid, jid, data_type)
SpiderJob.objects.filter(jid=jid).update(data_status=DataStatus.DELETED_STATUS)
record_project_usage_after_data_delete(pid, int(jid))

Expand Down
19 changes: 7 additions & 12 deletions estela-api/docs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1205,9 +1205,9 @@ paths:
type: string
responses:
'200':
description: ''
schema:
$ref: '#/definitions/DeleteJobData'
description: Deletion successful
'404':
description: Could not delete data
tags:
- api
parameters:
Expand Down Expand Up @@ -2647,15 +2647,10 @@ definitions:
title: Data expiry days
description: Job data expiry days.
type: integer
DeleteJobData:
required:
- count
type: object
properties:
count:
title: Count
description: Deleted items count.
type: integer
proxy_usage_data:
title: Proxy usage data
description: Proxy Usage data.
type: string
UsageRecord:
required:
- processing_time
Expand Down
10 changes: 3 additions & 7 deletions estela-web/src/pages/JobDataListPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ import { RouteComponentProps } from "react-router-dom";

import "./styles.scss";
import { ApiService } from "../../services";
import {
ApiProjectsSpidersJobsDataListRequest,
ApiProjectsSpidersJobsDataDeleteRequest,
DeleteJobData,
} from "../../services/api";
import { ApiProjectsSpidersJobsDataListRequest, ApiProjectsSpidersJobsDataDeleteRequest } from "../../services/api";
import { resourceNotAllowedNotification, dataDeletedNotification, Spin } from "../../shared";

const { Content } = Layout;
Expand Down Expand Up @@ -59,9 +55,9 @@ export class JobDataListPage extends Component<RouteComponentProps<RouteParams>,
type: this.type,
};
this.apiService.apiProjectsSpidersJobsDataDelete(request).then(
(response: DeleteJobData) => {
() => {
this.setState({ data: [], count: 0, current: 0, loaded: true });
dataDeletedNotification(response.count);
dataDeletedNotification();
},
(error: unknown) => {
error;
Expand Down
6 changes: 3 additions & 3 deletions estela-web/src/pages/JobDataPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {
ApiProjectsSpidersJobsDataListRequest,
ApiProjectsSpidersJobsDataDeleteRequest,
ApiProjectsSpidersJobsDataDownloadRequest,
DeleteJobData,
InlineResponse2006,
InlineResponse2008,
} from "../../services/api";
Expand Down Expand Up @@ -43,8 +42,8 @@ const deleteSpiderJobData = (type_: string, projectId: string, spiderId: string,
type: type_,
};
return apiService.apiProjectsSpidersJobsDataDelete(request).then(
(response: DeleteJobData) => {
dataDeletedNotification(response.count);
() => {
dataDeletedNotification();
return true;
},
(error: unknown) => {
Expand Down Expand Up @@ -656,6 +655,7 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
const [count, setCount] = useState(0);
const [loaded, setLoaded] = useState(false);
const [logs, setLogs] = useState<Dictionary[]>([]);

useEffect(() => {
getData("logs", 1, projectId, spiderId, jobId).then((response) => {
let data: Dictionary[] = [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ models/Activity.ts
models/AuthToken.ts
models/ChangePassword.ts
models/CoverageStats.ts
models/DeleteJobData.ts
models/Deploy.ts
models/DeployCreate.ts
models/DeployUpdate.ts
Expand Down
12 changes: 4 additions & 8 deletions estela-web/src/services/api/generated-api/apis/ApiApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ import {
ChangePassword,
ChangePasswordFromJSON,
ChangePasswordToJSON,
DeleteJobData,
DeleteJobDataFromJSON,
DeleteJobDataToJSON,
Deploy,
DeployFromJSON,
DeployToJSON,
Expand Down Expand Up @@ -2071,7 +2068,7 @@ export class ApiApi extends runtime.BaseAPI {

/**
*/
async apiProjectsSpidersJobsDataDeleteRaw(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<runtime.ApiResponse<DeleteJobData>> {
async apiProjectsSpidersJobsDataDeleteRaw(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<runtime.ApiResponse<void>> {
if (requestParameters.jid === null || requestParameters.jid === undefined) {
throw new runtime.RequiredError('jid','Required parameter requestParameters.jid was null or undefined when calling apiProjectsSpidersJobsDataDelete.');
}
Expand Down Expand Up @@ -2106,14 +2103,13 @@ export class ApiApi extends runtime.BaseAPI {
query: queryParameters,
});

return new runtime.JSONApiResponse(response, (jsonValue) => DeleteJobDataFromJSON(jsonValue));
return new runtime.VoidApiResponse(response);
}

/**
*/
async apiProjectsSpidersJobsDataDelete(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<DeleteJobData> {
const response = await this.apiProjectsSpidersJobsDataDeleteRaw(requestParameters);
return await response.value();
async apiProjectsSpidersJobsDataDelete(requestParameters: ApiProjectsSpidersJobsDataDeleteRequest): Promise<void> {
await this.apiProjectsSpidersJobsDataDeleteRaw(requestParameters);
}

/**
Expand Down
57 changes: 0 additions & 57 deletions estela-web/src/services/api/generated-api/models/DeleteJobData.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ export interface SpiderJobUpdate {
* @memberof SpiderJobUpdate
*/
dataExpiryDays?: number;
/**
* Proxy Usage data.
* @type {string}
* @memberof SpiderJobUpdate
*/
proxyUsageData?: string;
}

/**
Expand Down Expand Up @@ -108,6 +114,7 @@ export function SpiderJobUpdateFromJSONTyped(json: any, ignoreDiscriminator: boo
'requestCount': !exists(json, 'request_count') ? undefined : json['request_count'],
'dataStatus': !exists(json, 'data_status') ? undefined : json['data_status'],
'dataExpiryDays': !exists(json, 'data_expiry_days') ? undefined : json['data_expiry_days'],
'proxyUsageData': !exists(json, 'proxy_usage_data') ? undefined : json['proxy_usage_data'],
};
}

Expand All @@ -127,6 +134,7 @@ export function SpiderJobUpdateToJSON(value?: SpiderJobUpdate | null): any {
'request_count': value.requestCount,
'data_status': value.dataStatus,
'data_expiry_days': value.dataExpiryDays,
'proxy_usage_data': value.proxyUsageData,
};
}

Expand Down
1 change: 0 additions & 1 deletion estela-web/src/services/api/generated-api/models/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ export * from './Activity';
export * from './AuthToken';
export * from './ChangePassword';
export * from './CoverageStats';
export * from './DeleteJobData';
export * from './Deploy';
export * from './DeployCreate';
export * from './DeployUpdate';
Expand Down
4 changes: 2 additions & 2 deletions estela-web/src/shared/notifications/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ export const resourceNotAllowedNotification = (): void => {
history.push("/");
};

export const dataDeletedNotification = (n: number): void => {
export const dataDeletedNotification = (): void => {
notification.open({
message: "Data Successfully Deleted",
description: `${n} Items have been deleted`,
description: "All items have been deleted",
});
};

Expand Down
Loading