Skip to content

Commit

Permalink
Issue #1063
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Nov 5, 2024
1 parent 88eeb0d commit 2fddcdb
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 149 deletions.
11 changes: 3 additions & 8 deletions sde_collections/models/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,14 +460,9 @@ def sync_with_production_webapp(self) -> None:

def apply_all_patterns(self) -> None:
"""Apply all the patterns."""
for pattern in self.excludepattern.all():
pattern.apply()
for pattern in self.includepattern.all():
pattern.apply()
for pattern in self.titlepattern.all():
pattern.apply()
for pattern in self.documenttypepattern.all():
pattern.apply()
for pattern_type in [self.excludepattern, self.includepattern, self.titlepattern, self.documenttypepattern]:
for pattern in pattern_type.all():
pattern.apply()

def save(self, *args, **kwargs):
# Call the function to generate the value for the generated_field based on the original_field
Expand Down
37 changes: 13 additions & 24 deletions sde_collections/models/collection_choice_fields.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,52 @@
from django.db import models


class Divisions(models.IntegerChoices):
ASTROPHYSICS = 1, "Astrophysics"
BIOLOGY = 2, "Biological and Physical Sciences"
EARTH_SCIENCE = 3, "Earth Science"
HELIOPHYSICS = 4, "Heliophysics"
PLANETARY = 5, "Planetary Science"
GENERAL = 6, "General"

class ChoiceFieldMixin:
@classmethod
def lookup_by_text(cls, text: str) -> int | None:
"""Common lookup functionality for all choice fields"""
for choice in cls.choices:
if choice[1].lower() == text.lower():
return choice[0]
return None


class Divisions(ChoiceFieldMixin, models.IntegerChoices):
ASTROPHYSICS = 1, "Astrophysics"
BIOLOGY = 2, "Biological and Physical Sciences"
EARTH_SCIENCE = 3, "Earth Science"
HELIOPHYSICS = 4, "Heliophysics"
PLANETARY = 5, "Planetary Science"
GENERAL = 6, "General"


class UpdateFrequencies(models.IntegerChoices):
DAILY = 1, "Daily"
WEEKLY = 2, "Weekly"
BIWEEKLY = 3, "Biweekly"
MONTHLY = 4, "Monthly"


class DocumentTypes(models.IntegerChoices):
class DocumentTypes(ChoiceFieldMixin, models.IntegerChoices):
IMAGES = 1, "Images"
DATA = 2, "Data"
DOCUMENTATION = 3, "Documentation"
SOFTWARETOOLS = 4, "Software and Tools"
MISSIONSINSTRUMENTS = 5, "Missions and Instruments"

@classmethod
def lookup_by_text(cls, text: str) -> int | None:
for choice in cls.choices:
if choice[1].lower() == text.lower():
return choice[0]
return None


class SourceChoices(models.IntegerChoices):
ONLY_IN_ORIGINAL = 1, "Only in original"
BOTH = 2, "Both"
ONLY_IN_SINEQUA_CONFIGS = 3, "Only in Sinequa configs"


class ConnectorChoices(models.IntegerChoices):
class ConnectorChoices(ChoiceFieldMixin, models.IntegerChoices):
CRAWLER2 = 1, "crawler2"
JSON = 2, "json"
HYPERINDEX = 3, "hyperindex"
NO_CONNECTOR = 4, "No Connector"

@classmethod
def lookup_by_text(cls, text: str) -> int | None:
for choice in cls.choices:
if choice[1].lower() == text.lower():
return choice[0]
return None


class CurationStatusChoices(models.IntegerChoices):
NEEDS_SCRAPING = 1, "Needs Scraping"
Expand Down
99 changes: 34 additions & 65 deletions sde_collections/models/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class MatchPatternTypeChoices(models.IntegerChoices):
"CandidateURL",
related_name="%(class)s_urls",
)
field_to_update = None # This should be overridden in subclasses
value_field = None # Field containing the value to set (e.g., 'document_type', 'division')

def matched_urls(self):
"""Find all the urls matching the pattern."""
Expand Down Expand Up @@ -63,10 +65,28 @@ def _process_match_pattern(self) -> str:
return processed_pattern

def apply(self):
raise NotImplementedError
"""Universal apply method that works with both direct fields and relationships"""
if self.field_to_update and not self.value_field:
raise NotImplementedError("Subclasses must define both field_to_update and value_field")

matched_urls = self.matched_urls()

if self.field_to_update:
# For patterns that update a field (DocumentType, Division)
value_to_set = getattr(self, self.value_field)
matched_urls.update(**{self.field_to_update: value_to_set})

# Create relationships for all patterns
self.candidate_urls.add(*matched_urls)

def unapply(self):
raise NotImplementedError
"""Universal unapply method"""
if self.field_to_update:
# For patterns that update a field
self.candidate_urls.update(**{self.field_to_update: None})

# Remove relationships for all patterns
self.candidate_urls.clear()

def save(self, *args, **kwargs):
"""Save the pattern and apply it."""
Expand All @@ -89,47 +109,19 @@ def __str__(self):

class ExcludePattern(BaseMatchPattern):
reason = models.TextField("Reason for excluding", default="", blank=True)

def apply(self) -> None:
matched_urls = self.matched_urls()
candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
ExcludePattern.candidate_urls.through(candidateurl_id=candidate_url_id, excludepattern_id=self.id)
for candidate_url_id in candidate_url_ids
]
)

def unapply(self) -> None:
"Unapplies automatically by deleting include pattern through objects in a cascade"
return
# No field_to_update needed - uses relationships only

class Meta:
"""Meta definition for ExcludePattern."""

verbose_name = "Exclude Pattern"
verbose_name_plural = "Exclude Patterns"
unique_together = ("collection", "match_pattern")


class IncludePattern(BaseMatchPattern):
def apply(self) -> None:
matched_urls = self.matched_urls()
candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
IncludePattern.candidate_urls.through(candidateurl_id=candidate_url_id, includepattern_id=self.id)
for candidate_url_id in candidate_url_ids
]
)

def unapply(self) -> None:
"Unapplies automatically by deleting includepattern through objects in a cascade"
return
# No field_to_update needed - uses relationships only
pass

class Meta:
"""Meta definition for IncludePattern."""

verbose_name = "Include Pattern"
verbose_name_plural = "Include Patterns"
unique_together = ("collection", "match_pattern")
Expand Down Expand Up @@ -225,47 +217,19 @@ class Meta:

class DocumentTypePattern(BaseMatchPattern):
document_type = models.IntegerField(choices=DocumentTypes.choices)

def apply(self) -> None:
matched_urls = self.matched_urls()
matched_urls.update(document_type=self.document_type)
candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
DocumentTypePattern.candidate_urls.through(
candidateurl_id=candidate_url_id, documenttypepattern_id=self.id
)
for candidate_url_id in candidate_url_ids
]
)

def unapply(self) -> None:
self.candidate_urls.update(document_type=None)
field_to_update = 'document_type'
value_field = 'document_type'

class Meta:
"""Meta definition for DocumentTypePattern."""

verbose_name = "Document Type Pattern"
verbose_name_plural = "Document Type Patterns"
unique_together = ("collection", "match_pattern")


class DivisionPattern(BaseMatchPattern):
division = models.IntegerField(choices=Divisions.choices)

def apply(self) -> None:
matched_urls = self.matched_urls()
matched_urls.update(division=self.division)
candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
DivisionPattern.candidate_urls.through(candidateurl_id=candidate_url_id, divisionpattern_id=self.id)
for candidate_url_id in candidate_url_ids
]
)

def unapply(self) -> None:
self.candidate_urls.update(division=None)
field_to_update = 'division'
value_field = 'division'

class Meta:
verbose_name = "Division Pattern"
Expand All @@ -277,3 +241,8 @@ class Meta:
# def post_save_handler(sender, instance, created, **kwargs):
# if created:
# transaction.on_commit(lambda: resolve_title_pattern.delay(instance.pk))




#To test:
75 changes: 23 additions & 52 deletions sde_collections/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ class Meta:
"name": {"required": False},
}

# extra_kwargs = {
# "name": {"required": False},
# "config_folder": {"required": False},
# "division": {"required": False},
# }


class CollectionReadSerializer(serializers.ModelSerializer):
class Meta:
Expand Down Expand Up @@ -127,25 +121,19 @@ class Meta:
def get_document_type(self, obj):
if obj.document_type is not None:
return obj.get_document_type_display()
elif obj.collection.document_type is not None:
return obj.collection.get_document_type_display()
else:
return "Unknown"
return obj.collection.get_document_type_display() if obj.collection.document_type is not None else "Unknown"

def get_title(self, obj):
return obj.generated_title if obj.generated_title else obj.scraped_title
return obj.generated_title or obj.scraped_title

def get_file_extension(self, obj):
return obj.fileext

def get_tree_root(self, obj):
if obj.collection.is_multi_division:
if obj.division:
return f"/{obj.get_division_display()}/{obj.collection.name}/"
else:
return f"/{obj.collection.get_division_display()}/{obj.collection.name}/"
else:
return obj.collection.tree_root
division = obj.get_division_display() if obj.division else obj.collection.get_division_display()
return f"/{division}/{obj.collection.name}/"
return obj.collection.tree_root


class BasePatternSerializer(serializers.ModelSerializer):
Expand All @@ -167,6 +155,20 @@ class Meta:
abstract = True


def create_pattern_validator(model_class, match_pattern_type_choices):
def validate_match_pattern(self, value):
try:
pattern = model_class.objects.get(
match_pattern=value,
match_pattern_type=match_pattern_type_choices.INDIVIDUAL_URL,
)
pattern.delete()
except model_class.DoesNotExist:
pass
return value
return validate_match_pattern


class ExcludePatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
class Meta:
model = ExcludePattern
Expand All @@ -184,25 +186,13 @@ class Meta:
model = TitlePattern
fields = BasePatternSerializer.Meta.fields + ("title_pattern",)

def validate_match_pattern(self, value):
try:
title_pattern = TitlePattern.objects.get(
match_pattern=value,
match_pattern_type=TitlePattern.MatchPatternTypeChoices.INDIVIDUAL_URL,
)
title_pattern.delete()
except TitlePattern.DoesNotExist:
pass
return value
validate_match_pattern = create_pattern_validator(TitlePattern, TitlePattern.MatchPatternTypeChoices)


class DocumentTypePatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
document_type_display = serializers.CharField(source="get_document_type_display", read_only=True)
document_type = serializers.ChoiceField(
choices=DocumentTypes.choices
+ [
(0, "None"),
]
choices=DocumentTypes.choices + [(0, "None")]
)

class Meta:
Expand All @@ -212,17 +202,7 @@ class Meta:
"document_type_display",
)

def validate_match_pattern(self, value):
try:
title_pattern = DocumentTypePattern.objects.get(
match_pattern=value,
match_pattern_type=DocumentTypePattern.MatchPatternTypeChoices.INDIVIDUAL_URL,
)
title_pattern.delete()
except DocumentTypePattern.DoesNotExist:
pass
return value

validate_match_pattern = create_pattern_validator(DocumentTypePattern, DocumentTypePattern.MatchPatternTypeChoices)

class DivisionPatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
division_display = serializers.CharField(source="get_division_display", read_only=True)
Expand All @@ -235,13 +215,4 @@ class Meta:
"division_display",
)

def validate_match_pattern(self, value):
try:
division_pattern = DivisionPattern.objects.get(
match_pattern=value,
match_pattern_type=DivisionPattern.MatchPatternTypeChoices.INDIVIDUAL_URL,
)
division_pattern.delete()
except DivisionPattern.DoesNotExist:
pass
return value
validate_match_pattern = create_pattern_validator(DivisionPattern, DivisionPattern.MatchPatternTypeChoices)

0 comments on commit 2fddcdb

Please sign in to comment.