From 046b6d3a2ace914fdd07d3e6f6142d68532c2031 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Fri, 29 Sep 2023 13:34:39 -0500 Subject: [PATCH 01/13] Adding new statuses options for the curation app --- .gitignore | 3 + local.yml | 1 - sde_collections/admin.py | 2 +- .../management/commands/push_to_github.py | 31 ++++++ .../0032_collection_workflow_status.py | 36 +++++++ sde_collections/models/collection.py | 25 +++++ .../models/collection_choice_fields.py | 25 +++++ sde_collections/serializers.py | 5 + sde_collections/tasks.py | 1 + sde_collections/utils/health_check.py | 99 +++++++++++-------- sde_collections/views.py | 28 +++--- .../static/css/collections_list.css | 4 + .../static/js/collection_list.js | 67 ++++++++++++- .../sde_collections/collection_list.html | 21 ++++ 14 files changed, 291 insertions(+), 57 deletions(-) create mode 100644 sde_collections/migrations/0032_collection_workflow_status.py diff --git a/.gitignore b/.gitignore index 4d0382ee..f601cebf 100644 --- a/.gitignore +++ b/.gitignore @@ -293,3 +293,6 @@ config_generation/config.py #model's inference files Document_Classifier_inference/model.pt + +#Database Backup +backup.json diff --git a/local.yml b/local.yml index cdd86a30..2ffaeb66 100644 --- a/local.yml +++ b/local.yml @@ -19,7 +19,6 @@ services: env_file: - ./.envs/.local/.django - ./.envs/.local/.postgres - - ./.env ports: - "8000:8000" command: /start diff --git a/sde_collections/admin.py b/sde_collections/admin.py index 0aca0453..57dd7996 100644 --- a/sde_collections/admin.py +++ b/sde_collections/admin.py @@ -151,7 +151,7 @@ class CollectionAdmin(admin.ModelAdmin, ExportCsvMixin, UpdateConfigMixin): "division", "new_collection", ) - list_filter = ("division", "curation_status", "turned_on") + list_filter = ("division", "curation_status", "workflow_status", "turned_on") search_fields = ("name", "url") actions = [ "export_as_csv", diff --git a/sde_collections/management/commands/push_to_github.py b/sde_collections/management/commands/push_to_github.py index 37aed37f..e4f18772 100644 --- a/sde_collections/management/commands/push_to_github.py +++ b/sde_collections/management/commands/push_to_github.py @@ -20,14 +20,29 @@ def handle(self, *args, **options): config_folder__in=config_folders ).filter(curation_status=5) + # workflow status 8 is Curated + collections2 = Collection.objects.filter( + config_folder__in=config_folders + ).filter(workflow_status=8) + cant_push = Collection.objects.filter(config_folder__in=config_folders).exclude( curation_status=5 ) cant_push = list(cant_push.values_list("name", flat=True)) + # filer collections that can't be pushed based on workflow status + cant_push2 = Collection.objects.filter( + config_folder__in=config_folders + ).exclude(workflow_status=8) + cant_push2 = list(cant_push2.values_list("name", flat=True)) + gh = GitHubHandler(collections) gh.push_to_github() + # github handler takes in collections2 based on workflow status + gh2 = GitHubHandler(collections2) + gh2.push_to_github() + self.stdout.write( self.style.SUCCESS( "Successfully pushed: %s" @@ -42,3 +57,19 @@ def handle(self, *args, **options): % cant_push ) ) + + # workflow status based code addition + self.stdout.write( + self.style.SUCCESS( + "Successfully pushed: %s" + % list(collections2.values_list("name", flat=True)) + ) + ) + + if cant_push2: + self.stdout.write( + self.style.ERROR( + "Can't push since status is not Curated (choice_id:8) %s" + % cant_push2 + ) + ) diff --git a/sde_collections/migrations/0032_collection_workflow_status.py b/sde_collections/migrations/0032_collection_workflow_status.py new file mode 100644 index 00000000..6a10edcd --- /dev/null +++ b/sde_collections/migrations/0032_collection_workflow_status.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2.3 on 2023-09-28 19:34 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("sde_collections", "0031_candidateurl_is_pdf"), + ] + + operations = [ + migrations.AddField( + model_name="collection", + name="workflow_status", + field=models.IntegerField( + choices=[ + (1, "Collection Created"), + (2, "Unengineered"), + (3, "Engineering In Progress"), + (4, "Engineering Completed But No URL's Yet"), + (5, "URL'S Generated"), + (6, "Ready For Curation"), + (7, "Being Curated"), + (8, "Curated"), + (9, "Final Code Reviewed"), + (10, "Deployed To Test"), + (11, "Completed Running On Test Server"), + (12, "Quality Check In Progress"), + (13, "Quality Check Completed"), + (14, "Deployed To Production"), + (15, "Finished Running on Production"), + ], + default=1, + ), + ), + ] diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index bfe4f8cd..5d725e49 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -12,6 +12,7 @@ DocumentTypes, SourceChoices, UpdateFrequencies, + WorkflowStatusChoices, ) User = get_user_model() @@ -71,6 +72,9 @@ class Collection(models.Model): curation_status = models.IntegerField( choices=CurationStatusChoices.choices, default=1 ) + workflow_status = models.IntegerField( + choices=WorkflowStatusChoices.choices, default=1 + ) curated_by = models.ForeignKey( User, on_delete=models.DO_NOTHING, null=True, blank=True ) @@ -97,6 +101,27 @@ def curation_status_button_color(self) -> str: } return color_choices[self.curation_status] + @property + def workflow_status_button_color(self) -> str: + color_choices = { + 1: "btn-light", + 2: "btn-danger", + 3: "btn-warning", + 4: "btn-info", + 5: "btn-success", + 6: "btn-primary", + 7: "btn-info", + 8: "btn-secondary", + 9: "btn-light", + 10: "btn-danger", + 11: "btn-warning", + 12: "btn-info", + 13: "btn-secondary", + 14: "btn-primary", + 15: "btn-success", + } + return color_choices[self.workflow_status] + def _process_exclude_list(self): """Process the exclude list.""" return [ diff --git a/sde_collections/models/collection_choice_fields.py b/sde_collections/models/collection_choice_fields.py index 64a1d3b9..b69251ad 100644 --- a/sde_collections/models/collection_choice_fields.py +++ b/sde_collections/models/collection_choice_fields.py @@ -61,3 +61,28 @@ def get_status_string(cls, value): if choice[0] == value: return choice[1] return "N/A" + + +class WorkflowStatusChoices(models.IntegerChoices): + COLLECTION_CREATED = 1, "Collection Created" + UNENGINEERED = 2, "Unengineered" + ENGINEERING_IN_PROGRESS = 3, "Engineering In Progress" + ENGINEERING_DONE_NO_URLS = 4, "Engineering Completed But No URL's Yet" + URLS_DONE = 5, "URL'S Generated" + READY_FOR_CURATION = 6, "Ready For Curation" + BEING_CURATED = 7, "Being Curated" + CURATED = 8, "Curated" + FINAL_CODE_REVIEWED = 9, "Final Code Reviewed" + DEPLOYED_TO_TEST = 10, "Deployed To Test" + RUNNING_ON_TEST_COMPLETED = 11, "Completed Running On Test Server" + QUALITY_CHECK_IN_PROGRESS = 12, "Quality Check In Progress" + QUALITY_CHECK_FINISHED = 13, "Quality Check Completed" + DEPLOYED_TO_PROD = 14, "Deployed To Production" + FINISHED_RUNNING_ON_PROD = 15, "Finished Running on Production" + + @classmethod + def get_status_string(cls, value): + for choice in cls.choices: + if choice[0] == value: + return choice[1] + return "N/A" diff --git a/sde_collections/serializers.py b/sde_collections/serializers.py index 2a1d4718..745fcc52 100644 --- a/sde_collections/serializers.py +++ b/sde_collections/serializers.py @@ -10,13 +10,18 @@ class CollectionSerializer(serializers.ModelSerializer): curation_status_display = serializers.CharField( source="get_curation_status_display", read_only=True ) + workflow_status_display = serializers.CharField( + source="get_workflow_status_display", read_only=True + ) class Meta: model = Collection fields = ( "id", "curation_status", + "workflow_status", "curation_status_display", + "workflow_status_display", "curated_by", ) # extra_kwargs = { diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py index 6d86dc6c..657b9467 100644 --- a/sde_collections/tasks.py +++ b/sde_collections/tasks.py @@ -86,6 +86,7 @@ def import_candidate_urls_task(collection_ids=[], config_folder_names=[]): subprocess.run(f'python manage.py loaddata "{urls_file}"', shell=True) collection.apply_all_patterns() collection.curation_status = 2 # ready to curate + collection.workflow_status = 6 # ready to curate collection.save() shutil.rmtree(TEMP_FOLDER_NAME) diff --git a/sde_collections/utils/health_check.py b/sde_collections/utils/health_check.py index 0a3b5e7f..d059df7d 100644 --- a/sde_collections/utils/health_check.py +++ b/sde_collections/utils/health_check.py @@ -1,21 +1,20 @@ -from typing import Dict +import re -from sde_collections.models.pattern import ( - TitlePattern, - ExcludePattern) -from sde_collections.models.collection import CurationStatusChoices from sde_collections.models.candidate_url import CandidateURL +from sde_collections.models.collection import ( + CurationStatusChoices, + WorkflowStatusChoices, +) +from sde_collections.models.pattern import ExcludePattern, TitlePattern from sde_collections.tasks import _get_data_to_import -import re - -def health_check(collection, server_name: str = "production") -> Dict: +def health_check(collection, server_name: str = "production") -> dict: """ - This method checks whether the rules defined in webapp are properly - synced with Sinequa or not. + This method checks whether the rules defined in webapp are properly + synced with Sinequa or not. - Checks for Title Patterns, Exclude Patterns and Document Type Patterns. + Checks for Title Patterns, Exclude Patterns and Document Type Patterns. """ health_check_report = [] @@ -23,13 +22,15 @@ def health_check(collection, server_name: str = "production") -> Dict: candidate_urls_sinequa = _fetch_candidate_urls(collection, server_name) # check for title patterns - title_pattern_report = _health_check_title_pattern(collection, - candidate_urls_sinequa) + title_pattern_report = _health_check_title_pattern( + collection, candidate_urls_sinequa + ) health_check_report.extend(title_pattern_report) # check for exclude patterns - exclude_pattern_report = _health_check_exclude_pattern(collection, - candidate_urls_sinequa) + exclude_pattern_report = _health_check_exclude_pattern( + collection, candidate_urls_sinequa + ) health_check_report.extend(exclude_pattern_report) return health_check_report @@ -43,8 +44,7 @@ def _fetch_candidate_urls(collection, server_name): for candidate_url in candidate_urls_remote: url = candidate_url["fields"]["url"] candidate_urls_sinequa[url] = CandidateURL( - url=url, - scraped_title=candidate_url["fields"]["scraped_title"] + url=url, scraped_title=candidate_url["fields"]["scraped_title"] ) return candidate_urls_sinequa @@ -54,12 +54,14 @@ def _health_check_title_pattern(collection, candidate_urls_sinequa): collection_name = collection.name collection_config_folder = collection.config_folder curation_status = collection.curation_status + workflow_status = collection.workflow_status title_pattern_report = [] # now get Title Patterns in indexer db title_patterns_local = TitlePattern.objects.all().filter( - collection_id=collection_id) + collection_id=collection_id + ) # check if title patterns are porperly reflected in sinequa's response for title_pattern in title_patterns_local: @@ -78,7 +80,12 @@ def _health_check_title_pattern(collection, candidate_urls_sinequa): "id": collection_id, "collection_name": collection_name, "config_folder": collection_config_folder, - "curation_status": CurationStatusChoices.get_status_string(curation_status), + "curation_status": CurationStatusChoices.get_status_string( + curation_status + ), + "workflow_status": WorkflowStatusChoices.get_status_string( + workflow_status + ), "pattern_name": "Title Pattern", "pattern": pattern, "scraped_title": matched_title, @@ -94,12 +101,14 @@ def _health_check_exclude_pattern(collection, candidate_urls_sinequa): collection_name = collection.name collection_config_folder = collection.config_folder curation_status = collection.curation_status + workflow_status = collection.workflow_status exclude_pattern_report = [] # Perform exclude pattern check here exclude_patterns_local = ExcludePattern.objects.all().filter( - collection_id=collection_id) + collection_id=collection_id + ) def create_exclude_pattern_report(match_pattern, url): return { @@ -107,6 +116,7 @@ def create_exclude_pattern_report(match_pattern, url): "collection_name": collection_name, "config_folder": collection_config_folder, "curation_status": CurationStatusChoices.get_status_string(curation_status), + "workflow_status": WorkflowStatusChoices.get_status_string(workflow_status), "pattern_name": "Exclude Pattern", "pattern": match_pattern, "non_compliant_url": url, @@ -117,44 +127,49 @@ def create_exclude_pattern_report(match_pattern, url): # check with http:// if match_pattern.find("http://") == -1: - url = "http://{}".format(match_pattern) + url = f"http://{match_pattern}" if url in candidate_urls_sinequa: - exclude_pattern_report.append(create_exclude_pattern_report(match_pattern, - url)) + exclude_pattern_report.append( + create_exclude_pattern_report(match_pattern, url) + ) if match_pattern.find("https://") == -1: - url = "https://{}".format(match_pattern) + url = f"https://{match_pattern}" if url in candidate_urls_sinequa: - exclude_pattern_report.append(create_exclude_pattern_report(match_pattern, - url)) + exclude_pattern_report.append( + create_exclude_pattern_report(match_pattern, url) + ) else: url = match_pattern # assuming it has either https or http if url in candidate_urls_sinequa: - exclude_pattern_report.append(create_exclude_pattern_report(match_pattern, - url)) + exclude_pattern_report.append( + create_exclude_pattern_report(match_pattern, url) + ) return exclude_pattern_report def _resolve_title_pattern(pattern, title): """ - Given a pattern check whether it is able to capture the title or not. + Given a pattern check whether it is able to capture the title or not. - E.g.: GCN {title} - should capture : - -> GCN - Notices - -> GCN - News + E.g.: GCN {title} + should capture : + -> GCN - Notices + -> GCN - News """ - pattern_with_whitespace = pattern.replace(" ", "\s*-?\s*") + pattern_with_whitespace = pattern.replace(" ", r"\s*-?\s*") - parentheis_pattern = r'\{[^\}]+\}' - multi_pattern = r'\/\/\*([^\/]*)\/a' + parentheis_pattern = r"\{[^\}]+\}" + multi_pattern = r"\/\/\*([^\/]*)\/a" def replace_parentheis_with_anything(match): - return r'\S+' - - regex_pattern_parenthesis = re.sub(parentheis_pattern, replace_parentheis_with_anything, - pattern_with_whitespace) - regex_pattern = re.sub(multi_pattern, replace_parentheis_with_anything, - regex_pattern_parenthesis) + return r"\S+" + + regex_pattern_parenthesis = re.sub( + parentheis_pattern, replace_parentheis_with_anything, pattern_with_whitespace + ) + regex_pattern = re.sub( + multi_pattern, replace_parentheis_with_anything, regex_pattern_parenthesis + ) return re.match(regex_pattern, title) diff --git a/sde_collections/views.py b/sde_collections/views.py index 272a4b74..eef7cd9f 100644 --- a/sde_collections/views.py +++ b/sde_collections/views.py @@ -1,5 +1,6 @@ -import re import csv +import re +from io import StringIO from django.contrib.auth import get_user_model from django.contrib.auth.mixins import LoginRequiredMixin @@ -12,7 +13,6 @@ from django.views.generic.detail import DetailView from django.views.generic.edit import DeleteView from django.views.generic.list import ListView -from django.http import HttpResponse from rest_framework import generics, status, viewsets from rest_framework.response import Response from rest_framework.views import APIView @@ -22,7 +22,10 @@ from .forms import CollectionGithubIssueForm, RequiredUrlForm from .models.candidate_url import CandidateURL from .models.collection import Collection, RequiredUrls -from .models.collection_choice_fields import CurationStatusChoices +from .models.collection_choice_fields import ( + CurationStatusChoices, + WorkflowStatusChoices, +) from .models.pattern import DocumentTypePattern, ExcludePattern, TitlePattern from .serializers import ( CandidateURLBulkCreateSerializer, @@ -34,8 +37,6 @@ ) from .tasks import push_to_github_task from .utils.health_check import health_check -from io import StringIO - User = get_user_model() @@ -98,6 +99,7 @@ def get_context_data(self, **kwargs): context["segment"] = "collections" context["curators"] = User.objects.filter(groups__name="Curators") context["curation_status_choices"] = CurationStatusChoices + context["workflow_status_choices"] = WorkflowStatusChoices return context @@ -138,6 +140,7 @@ def post(self, request, *args, **kwargs): if "claim_button" in request.POST: user = self.request.user collection.curation_status = CurationStatusChoices.BEING_CURATED + collection.workflow_status = WorkflowStatusChoices.BEING_CURATED collection.curated_by = user collection.curation_started = timezone.now() collection.save() @@ -353,23 +356,24 @@ def post(self, request): class HealthCheckView(View): - '''' - This view checks whether the rules in indexer db has been correctly reflected - in our prod/test sinequa instances or not and at the end generates a report. - ''' + """' + This view checks whether the rules in indexer db has been correctly reflected + in our prod/test sinequa instances or not and at the end generates a report. + """ def get(self, *args, **kwargs): - collection = Collection.objects.get(pk=kwargs.get('pk')) + collection = Collection.objects.get(pk=kwargs.get("pk")) sync_check_report = health_check(collection, server_name="production") field_names = [ "id", "collection_name", "config_folder", "curation_status", + "workflow_status", "pattern_name", "pattern", "scraped_title", - "non_compliant_url" + "non_compliant_url", ] # download the report in CSV format @@ -380,7 +384,7 @@ def get(self, *args, **kwargs): writer.writerow(item) http_response = HttpResponse(content_type="text/csv") - http_response['Content-Disposition'] = 'attachment; filename="report.csv"' + http_response["Content-Disposition"] = 'attachment; filename="report.csv"' http_response.write(csv_data.getvalue()) return http_response diff --git a/sde_indexing_helper/static/css/collections_list.css b/sde_indexing_helper/static/css/collections_list.css index 602bce2a..521cb89c 100644 --- a/sde_indexing_helper/static/css/collections_list.css +++ b/sde_indexing_helper/static/css/collections_list.css @@ -12,6 +12,10 @@ body { cursor: pointer !important; } +.workflow_status_select { + cursor: pointer !important; +} + .curator_select { cursor: pointer !important; } diff --git a/sde_indexing_helper/static/js/collection_list.js b/sde_indexing_helper/static/js/collection_list.js index 9f781834..65de853e 100644 --- a/sde_indexing_helper/static/js/collection_list.js +++ b/sde_indexing_helper/static/js/collection_list.js @@ -25,7 +25,8 @@ let table = $('#collection_table').DataTable({ var data = this.data(); var collection_name = $(data[1]).text().slice(0, -14); // remove " chevron_right" from end of string var collection_id = $(data[1]).attr('href').slice(1, -1); // we get /932/ from href="/932/" - var curation_status = $(data[6]).find('button').text(); // we get /932/ from href="/932/" + var curation_status = $(data[6]).find('button').text(); + var workflow_status = $(data[7]).find('button').text(); if (curation_status != "Curated") { toastr.error(`Can't push ${collection_name} because its status is not "Curated".`); @@ -33,6 +34,12 @@ let table = $('#collection_table').DataTable({ collection_ids.push(collection_id); toastr.success(`Started pushing ${collection_name} to GitHub...`); } + if (workflow_status != "Curated") { + toastr.error(`Can't push ${collection_name} because its status is not "Curated".`); + } else { + collection_ids.push(collection_id); + toastr.success(`Started pushing ${collection_name} to GitHub...`); + } }); $.ajax({ url: '/api/collections/push_to_github/', @@ -146,6 +153,45 @@ function handleCurationStatusSelect() { }); } +function handleWorkflowStatusSelect() { + $("body").on("click", ".workflow_status_select", function () { + var collection_id = $(this).data('collection-id'); + var workflow_status = $(this).attr('value'); + var workflow_status_text = $(this).text(); + var color_choices = { + 1: "btn-light", + 2: "btn-danger", + 3: "btn-warning", + 4: "btn-info", + 5: "btn-success", + 6: "btn-primary", + 7: "btn-info", + 8: "btn-secondary", + 9: "btn-light", + 10: "btn-danger", + 11: "btn-warning", + 12: "btn-info", + 13: "btn-secondary", + 14: "btn-primary", + 15: "btn-success", + } + + $possible_buttons = $('body').find(`[id="workflow-status-button-${collection_id}"]`); + if ($possible_buttons.length > 1) { + $button = $possible_buttons[1]; + $button = $($button); + } else { + $button = $(`#workflow-status-button-${collection_id}`); + } + $button.text(workflow_status_text); + $button.removeClass('btn-light btn-danger btn-warning btn-info btn-success btn-primary btn-secondary'); + $button.addClass(color_choices[parseInt(workflow_status)]); + $('#collection_table').DataTable().searchPanes.rebuildPane(6); + + postWorkflowStatus(collection_id, workflow_status); + }); +} + function handleCuratorSelect() { $("body").on("click", ".curator_select", function () { var collection_id = $(this).data('collection-id'); @@ -178,6 +224,24 @@ function postCurationStatus(collection_id, curation_status) { }); } +function postWorkflowStatus(collection_id, workflow_status) { + var url = `/api/collections/${collection_id}/`; + $.ajax({ + url: url, + type: "PUT", + data: { + workflow_status: workflow_status, + csrfmiddlewaretoken: csrftoken + }, + headers: { + 'X-CSRFToken': csrftoken + }, + success: function (data) { + toastr.success('Workflow Status Updated!'); + }, + }); +} + function postCurator(collection_id, curator_id) { var url = `/api/collections/${collection_id}/`; $.ajax({ @@ -202,5 +266,6 @@ $(document).ready(function () { function setupClickHandlers() { handleCurationStatusSelect(); + handleWorkflowStatusSelect(); handleCuratorSelect(); } diff --git a/sde_indexing_helper/templates/sde_collections/collection_list.html b/sde_indexing_helper/templates/sde_collections/collection_list.html index 6c5e22ee..74036b77 100644 --- a/sde_indexing_helper/templates/sde_collections/collection_list.html +++ b/sde_indexing_helper/templates/sde_collections/collection_list.html @@ -22,6 +22,7 @@

SDE Collections

Candidate URLs New? Status + Workflow Status Curator Has Config? Connector Type @@ -73,6 +74,26 @@

SDE Collections

+ + + + + +