diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index 31306b8c..5df2c931 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -460,14 +460,9 @@ def sync_with_production_webapp(self) -> None: def apply_all_patterns(self) -> None: """Apply all the patterns.""" - for pattern in self.excludepattern.all(): - pattern.apply() - for pattern in self.includepattern.all(): - pattern.apply() - for pattern in self.titlepattern.all(): - pattern.apply() - for pattern in self.documenttypepattern.all(): - pattern.apply() + for pattern_type in [self.excludepattern, self.includepattern, self.titlepattern, self.documenttypepattern]: + for pattern in pattern_type.all(): + pattern.apply() def save(self, *args, **kwargs): # Call the function to generate the value for the generated_field based on the original_field diff --git a/sde_collections/models/collection_choice_fields.py b/sde_collections/models/collection_choice_fields.py index 3a9a3664..8a7b42e9 100644 --- a/sde_collections/models/collection_choice_fields.py +++ b/sde_collections/models/collection_choice_fields.py @@ -1,22 +1,25 @@ from django.db import models -class Divisions(models.IntegerChoices): - ASTROPHYSICS = 1, "Astrophysics" - BIOLOGY = 2, "Biological and Physical Sciences" - EARTH_SCIENCE = 3, "Earth Science" - HELIOPHYSICS = 4, "Heliophysics" - PLANETARY = 5, "Planetary Science" - GENERAL = 6, "General" - +class ChoiceFieldMixin: @classmethod def lookup_by_text(cls, text: str) -> int | None: + """Common lookup functionality for all choice fields""" for choice in cls.choices: if choice[1].lower() == text.lower(): return choice[0] return None +class Divisions(ChoiceFieldMixin, models.IntegerChoices): + ASTROPHYSICS = 1, "Astrophysics" + BIOLOGY = 2, "Biological and Physical Sciences" + EARTH_SCIENCE = 3, "Earth Science" + HELIOPHYSICS = 4, "Heliophysics" + PLANETARY = 5, "Planetary Science" + GENERAL = 6, "General" + + class UpdateFrequencies(models.IntegerChoices): DAILY = 1, "Daily" WEEKLY = 2, "Weekly" @@ -24,20 +27,13 @@ class UpdateFrequencies(models.IntegerChoices): MONTHLY = 4, "Monthly" -class DocumentTypes(models.IntegerChoices): +class DocumentTypes(ChoiceFieldMixin, models.IntegerChoices): IMAGES = 1, "Images" DATA = 2, "Data" DOCUMENTATION = 3, "Documentation" SOFTWARETOOLS = 4, "Software and Tools" MISSIONSINSTRUMENTS = 5, "Missions and Instruments" - @classmethod - def lookup_by_text(cls, text: str) -> int | None: - for choice in cls.choices: - if choice[1].lower() == text.lower(): - return choice[0] - return None - class SourceChoices(models.IntegerChoices): ONLY_IN_ORIGINAL = 1, "Only in original" @@ -45,19 +41,12 @@ class SourceChoices(models.IntegerChoices): ONLY_IN_SINEQUA_CONFIGS = 3, "Only in Sinequa configs" -class ConnectorChoices(models.IntegerChoices): +class ConnectorChoices(ChoiceFieldMixin, models.IntegerChoices): CRAWLER2 = 1, "crawler2" JSON = 2, "json" HYPERINDEX = 3, "hyperindex" NO_CONNECTOR = 4, "No Connector" - @classmethod - def lookup_by_text(cls, text: str) -> int | None: - for choice in cls.choices: - if choice[1].lower() == text.lower(): - return choice[0] - return None - class CurationStatusChoices(models.IntegerChoices): NEEDS_SCRAPING = 1, "Needs Scraping" diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index 1e14042b..986de4c5 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -34,6 +34,8 @@ class MatchPatternTypeChoices(models.IntegerChoices): "CandidateURL", related_name="%(class)s_urls", ) + field_to_update = None # This should be overridden in subclasses + value_field = None # Field containing the value to set (e.g., 'document_type', 'division') def matched_urls(self): """Find all the urls matching the pattern.""" @@ -63,10 +65,28 @@ def _process_match_pattern(self) -> str: return processed_pattern def apply(self): - raise NotImplementedError + """Universal apply method that works with both direct fields and relationships""" + if self.field_to_update and not self.value_field: + raise NotImplementedError("Subclasses must define both field_to_update and value_field") + + matched_urls = self.matched_urls() + + if self.field_to_update: + # For patterns that update a field (DocumentType, Division) + value_to_set = getattr(self, self.value_field) + matched_urls.update(**{self.field_to_update: value_to_set}) + + # Create relationships for all patterns + self.candidate_urls.add(*matched_urls) def unapply(self): - raise NotImplementedError + """Universal unapply method""" + if self.field_to_update: + # For patterns that update a field + self.candidate_urls.update(**{self.field_to_update: None}) + + # Remove relationships for all patterns + self.candidate_urls.clear() def save(self, *args, **kwargs): """Save the pattern and apply it.""" @@ -89,47 +109,19 @@ def __str__(self): class ExcludePattern(BaseMatchPattern): reason = models.TextField("Reason for excluding", default="", blank=True) - - def apply(self) -> None: - matched_urls = self.matched_urls() - candidate_url_ids = list(matched_urls.values_list("id", flat=True)) - self.candidate_urls.through.objects.bulk_create( - objs=[ - ExcludePattern.candidate_urls.through(candidateurl_id=candidate_url_id, excludepattern_id=self.id) - for candidate_url_id in candidate_url_ids - ] - ) - - def unapply(self) -> None: - "Unapplies automatically by deleting include pattern through objects in a cascade" - return + # No field_to_update needed - uses relationships only class Meta: - """Meta definition for ExcludePattern.""" - verbose_name = "Exclude Pattern" verbose_name_plural = "Exclude Patterns" unique_together = ("collection", "match_pattern") class IncludePattern(BaseMatchPattern): - def apply(self) -> None: - matched_urls = self.matched_urls() - candidate_url_ids = list(matched_urls.values_list("id", flat=True)) - self.candidate_urls.through.objects.bulk_create( - objs=[ - IncludePattern.candidate_urls.through(candidateurl_id=candidate_url_id, includepattern_id=self.id) - for candidate_url_id in candidate_url_ids - ] - ) - - def unapply(self) -> None: - "Unapplies automatically by deleting includepattern through objects in a cascade" - return + # No field_to_update needed - uses relationships only + pass class Meta: - """Meta definition for IncludePattern.""" - verbose_name = "Include Pattern" verbose_name_plural = "Include Patterns" unique_together = ("collection", "match_pattern") @@ -225,26 +217,10 @@ class Meta: class DocumentTypePattern(BaseMatchPattern): document_type = models.IntegerField(choices=DocumentTypes.choices) - - def apply(self) -> None: - matched_urls = self.matched_urls() - matched_urls.update(document_type=self.document_type) - candidate_url_ids = list(matched_urls.values_list("id", flat=True)) - self.candidate_urls.through.objects.bulk_create( - objs=[ - DocumentTypePattern.candidate_urls.through( - candidateurl_id=candidate_url_id, documenttypepattern_id=self.id - ) - for candidate_url_id in candidate_url_ids - ] - ) - - def unapply(self) -> None: - self.candidate_urls.update(document_type=None) + field_to_update = 'document_type' + value_field = 'document_type' class Meta: - """Meta definition for DocumentTypePattern.""" - verbose_name = "Document Type Pattern" verbose_name_plural = "Document Type Patterns" unique_together = ("collection", "match_pattern") @@ -252,20 +228,8 @@ class Meta: class DivisionPattern(BaseMatchPattern): division = models.IntegerField(choices=Divisions.choices) - - def apply(self) -> None: - matched_urls = self.matched_urls() - matched_urls.update(division=self.division) - candidate_url_ids = list(matched_urls.values_list("id", flat=True)) - self.candidate_urls.through.objects.bulk_create( - objs=[ - DivisionPattern.candidate_urls.through(candidateurl_id=candidate_url_id, divisionpattern_id=self.id) - for candidate_url_id in candidate_url_ids - ] - ) - - def unapply(self) -> None: - self.candidate_urls.update(division=None) + field_to_update = 'division' + value_field = 'division' class Meta: verbose_name = "Division Pattern" @@ -277,3 +241,8 @@ class Meta: # def post_save_handler(sender, instance, created, **kwargs): # if created: # transaction.on_commit(lambda: resolve_title_pattern.delay(instance.pk)) + + + + +#To test: \ No newline at end of file diff --git a/sde_collections/serializers.py b/sde_collections/serializers.py index 9623e85d..6e5f5fb2 100644 --- a/sde_collections/serializers.py +++ b/sde_collections/serializers.py @@ -35,12 +35,6 @@ class Meta: "name": {"required": False}, } - # extra_kwargs = { - # "name": {"required": False}, - # "config_folder": {"required": False}, - # "division": {"required": False}, - # } - class CollectionReadSerializer(serializers.ModelSerializer): class Meta: @@ -127,25 +121,19 @@ class Meta: def get_document_type(self, obj): if obj.document_type is not None: return obj.get_document_type_display() - elif obj.collection.document_type is not None: - return obj.collection.get_document_type_display() - else: - return "Unknown" + return obj.collection.get_document_type_display() if obj.collection.document_type is not None else "Unknown" def get_title(self, obj): - return obj.generated_title if obj.generated_title else obj.scraped_title + return obj.generated_title or obj.scraped_title def get_file_extension(self, obj): return obj.fileext def get_tree_root(self, obj): if obj.collection.is_multi_division: - if obj.division: - return f"/{obj.get_division_display()}/{obj.collection.name}/" - else: - return f"/{obj.collection.get_division_display()}/{obj.collection.name}/" - else: - return obj.collection.tree_root + division = obj.get_division_display() if obj.division else obj.collection.get_division_display() + return f"/{division}/{obj.collection.name}/" + return obj.collection.tree_root class BasePatternSerializer(serializers.ModelSerializer): @@ -167,6 +155,20 @@ class Meta: abstract = True +def create_pattern_validator(model_class, match_pattern_type_choices): + def validate_match_pattern(self, value): + try: + pattern = model_class.objects.get( + match_pattern=value, + match_pattern_type=match_pattern_type_choices.INDIVIDUAL_URL, + ) + pattern.delete() + except model_class.DoesNotExist: + pass + return value + return validate_match_pattern + + class ExcludePatternSerializer(BasePatternSerializer, serializers.ModelSerializer): class Meta: model = ExcludePattern @@ -184,25 +186,13 @@ class Meta: model = TitlePattern fields = BasePatternSerializer.Meta.fields + ("title_pattern",) - def validate_match_pattern(self, value): - try: - title_pattern = TitlePattern.objects.get( - match_pattern=value, - match_pattern_type=TitlePattern.MatchPatternTypeChoices.INDIVIDUAL_URL, - ) - title_pattern.delete() - except TitlePattern.DoesNotExist: - pass - return value + validate_match_pattern = create_pattern_validator(TitlePattern, TitlePattern.MatchPatternTypeChoices) class DocumentTypePatternSerializer(BasePatternSerializer, serializers.ModelSerializer): document_type_display = serializers.CharField(source="get_document_type_display", read_only=True) document_type = serializers.ChoiceField( - choices=DocumentTypes.choices - + [ - (0, "None"), - ] + choices=DocumentTypes.choices + [(0, "None")] ) class Meta: @@ -212,17 +202,7 @@ class Meta: "document_type_display", ) - def validate_match_pattern(self, value): - try: - title_pattern = DocumentTypePattern.objects.get( - match_pattern=value, - match_pattern_type=DocumentTypePattern.MatchPatternTypeChoices.INDIVIDUAL_URL, - ) - title_pattern.delete() - except DocumentTypePattern.DoesNotExist: - pass - return value - + validate_match_pattern = create_pattern_validator(DocumentTypePattern, DocumentTypePattern.MatchPatternTypeChoices) class DivisionPatternSerializer(BasePatternSerializer, serializers.ModelSerializer): division_display = serializers.CharField(source="get_division_display", read_only=True) @@ -235,13 +215,4 @@ class Meta: "division_display", ) - def validate_match_pattern(self, value): - try: - division_pattern = DivisionPattern.objects.get( - match_pattern=value, - match_pattern_type=DivisionPattern.MatchPatternTypeChoices.INDIVIDUAL_URL, - ) - division_pattern.delete() - except DivisionPattern.DoesNotExist: - pass - return value + validate_match_pattern = create_pattern_validator(DivisionPattern, DivisionPattern.MatchPatternTypeChoices) \ No newline at end of file