Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: related article validation #698

Closed
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
71628cf
Adapta a classe de validação ao novo modelo
Rossi-Luciano Sep 12, 2024
8cd2d4e
Adapta os testes
Rossi-Luciano Sep 12, 2024
4f19b32
Adiciona validação para os atributos de 'related-article'
Rossi-Luciano Sep 13, 2024
e1c0ad1
Adiciona validação para evento em 'history'
Rossi-Luciano Sep 13, 2024
641094a
Adiciona testes
Rossi-Luciano Sep 13, 2024
7beefdc
Remove 'related_articles_matches_history_date_validation'
Rossi-Luciano Sep 14, 2024
8e9232f
Adapta os testes
Rossi-Luciano Sep 14, 2024
fdc7fb0
Refatora e complementa validação de 'errata'
Rossi-Luciano Sep 22, 2024
e35d721
Adapta e adiciona testes
Rossi-Luciano Sep 22, 2024
5cb9511
Remove 'validation' de 'title'
Rossi-Luciano Sep 22, 2024
96a3999
Adapta os testes
Rossi-Luciano Sep 22, 2024
d3facd9
Refatora validação de 'errata'
Rossi-Luciano Sep 24, 2024
530ad59
Adapta os testes
Rossi-Luciano Sep 24, 2024
80664ef
Adiciona argumento "related_article_type"
Rossi-Luciano Sep 30, 2024
078c2b8
Adiciona teste
Rossi-Luciano Sep 30, 2024
3c9f8ef
Remove o módulo de validação de 'preprint'
Rossi-Luciano Oct 9, 2024
a1fc865
Remove o módulo de validação de 'errata'
Rossi-Luciano Oct 9, 2024
35158fc
Corrige a extensão dos arquivos de listas controladas
Rossi-Luciano Oct 9, 2024
2af892d
Adiciona 'related_article.json'
Rossi-Luciano Oct 9, 2024
14f341f
Adiciona 'related_article_type_date_type.json'
Rossi-Luciano Oct 9, 2024
619c161
Renomeia classe
Rossi-Luciano Oct 9, 2024
acfb059
Adiciona 'remove_namespaces()'
Rossi-Luciano Oct 9, 2024
6f6b968
Aplica formatação
Rossi-Luciano Oct 9, 2024
cfac0fa
Adiciona 'full_tag'
Rossi-Luciano Oct 9, 2024
626fdd3
Aplica formatação
Rossi-Luciano Oct 9, 2024
df03ab3
Corrige importações
Rossi-Luciano Oct 9, 2024
f919184
Corrige e adiciona 'docstring'
Rossi-Luciano Oct 9, 2024
c1a45b2
Remove validação de atributos (substituída pela validação de ordem)
Rossi-Luciano Oct 9, 2024
2d946ec
Adiciona 'validate_history_date'
Rossi-Luciano Oct 9, 2024
fdc7e89
Adiciona validação de ordem dos atributos
Rossi-Luciano Oct 9, 2024
3d347e6
Adiciona validação de DOI
Rossi-Luciano Oct 9, 2024
4d373d3
Adiciona 'validate'
Rossi-Luciano Oct 9, 2024
47c4598
Adiciona 'validate_related_article_matches_article_type'
Rossi-Luciano Oct 9, 2024
cbb03fe
Adapta testes de artigos relacionados
Rossi-Luciano Oct 9, 2024
e324186
Adapta testes de 'errata'
Rossi-Luciano Oct 9, 2024
5946418
Adapta testes de 'preprint'
Rossi-Luciano Oct 9, 2024
aeea305
Merge branch 'master' into feat_realated_article_validation
Rossi-Luciano Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 78 additions & 72 deletions packtools/sps/validation/errata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,33 @@
from packtools.sps.models.article_dates import HistoryDates


def _get_related_articles(xml_tree, expected_related_article_type):
return [
article for article in RelatedItems(xml_tree).related_articles
if article.get("related-article-type") == expected_related_article_type
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Rossi-Luciano será que o melhor não seria ter o filtro e iterar em todos os related-article? Desta forma não está escondendo possíveis defeitos no XML ao ignorar os related-article de tipo inesperados?



def _format_obtained(related_article):
return (
f'<related-article ext-link-type="{related_article.get("ext-link-type")}" '
f'id="{related_article.get("id")}" related-article-type="{related_article.get("related-article-type")}" '
f'xlink:href="{related_article.get("href")}"/>'
)


class ValidationBase:
def __init__(self, xml_tree, expected_article_type, expected_related_article_type):
self.xml_tree = xml_tree
self.article_lang = xml_tree.get("{http://www.w3.org/XML/1998/namespace}lang")
self.article_type = xml_tree.find(".").get("article-type")
self.expected_article_type = expected_article_type
self.expected_related_article_type = expected_related_article_type
self.related_articles = self._get_related_articles()
self.related_articles = _get_related_articles(xml_tree, expected_related_article_type)

def validate_related_article(self, title, error_level="ERROR"):
"""
Validates the related articles against the expected type and other criteria.

Args:
error_level (str, optional): The error level for the validation response. Defaults to "ERROR".

Yields:
dict: A formatted response indicating whether the validation passed or failed.
"""
if self.article_type != self.expected_article_type:
return
Expand All @@ -40,7 +49,7 @@ def validate_related_article(self, title, error_level="ERROR"):
validation_type="match",
is_valid=True,
expected=expected_response,
obtained=self._format_obtained(related_article),
obtained=_format_obtained(related_article),
advice=None,
data=related_article,
error_level=error_level
Expand All @@ -65,87 +74,84 @@ def validate_related_article(self, title, error_level="ERROR"):
error_level=error_level
)

def _get_related_articles(self,):
return [
article for article in RelatedItems(self.xml_tree).related_articles
if article.get("related-article-type") == self.expected_related_article_type
]
def validate_history_dates(self, expected_history_event, error_level="ERROR"):
"""
Validates that the number of related articles matches the number of corresponding corrected dates.
"""
history_data = list(HistoryDates(self.xml_tree).history_dates())
history_dates = [date for date in history_data if expected_history_event in date.get("history")]
history_date_count = len(history_dates)
related_article_count = len(self.related_articles)

if history_date_count < related_article_count:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Rossi-Luciano não acho boa a abordagem de contagem. A abordagem a ser adotada deveria ser pareada. Porque o objetivo não é saber se a quantidade bate, mas qual é o item que tem a data faltando. E vice versa, se há a data no histórico e não há o related-article.

yield format_response(
title="validation related and corrected dates count",
parent="article",
parent_id=None,
parent_article_type=self.article_type,
parent_lang=self.article_lang,
item="related-article",
sub_item="@related-article-type",
validation_type="exist",
is_valid=False,
expected=f'equal numbers of <related-article type="{self.expected_related_article_type}"> and <date type="{expected_history_event}">',
obtained=f'{related_article_count} <related-article type="{self.expected_related_article_type}"> and {history_date_count} <date type="{expected_history_event}">',
advice=f'for each <related-article type="{self.expected_related_article_type}">, there must be a corresponding <date type="{expected_history_event}"> in <history>',
data=history_data,
error_level=error_level,
)

def _format_obtained(self, related_article):
return (
f'<related-article ext-link-type="{related_article.get("ext-link-type")}" '
f'id="{related_article.get("id")}" related-article-type="{related_article.get("related-article-type")}" '
f'xlink:href="{related_article.get("href")}"/>'
)

class SpecificValidation(ValidationBase):
"""
Base class for specific validations to handle common functionality for Errata, ArticleCorrected,
ArticleRetracted, and ArticlePartiallyRetracted validations.
"""

class ErrataValidation(ValidationBase):
def __init__(self, xml_tree, expected_article_type, expected_related_article_type):
super().__init__(xml_tree, expected_article_type, expected_related_article_type)

def validate_related_article(self, error_level="ERROR", title="validation matching 'correction' and 'corrected-article'"):
def validate_related_article(self, error_level="ERROR", title=None):
"""
Validates related articles specifically for corrected articles.

Args:
error_level (str, optional): The error level for the validation response. Defaults to "ERROR".
Common logic for validating related articles, where `title` must be provided by subclasses.
"""
if title is None:
raise ValueError("Title must be provided for the validation.")
yield from super().validate_related_article(title=title, error_level=error_level)

Yields:
dict: A formatted response indicating whether the validation passed or failed.
def validate_history_dates(self, error_level="ERROR", expected_history_event=None):
"""
yield from super().validate_related_article(error_level=error_level, title=title)
Common logic for validating history dates, where `expected_history_event` must be provided by subclasses.
"""
if expected_history_event is None:
raise ValueError("Expected history event must be provided.")
yield from super().validate_history_dates(expected_history_event=expected_history_event, error_level=error_level)


class CorrectedArticleValidation(ValidationBase):
def __init__(self, xml_tree, expected_article_type, expected_related_article_type):
super().__init__(xml_tree, expected_article_type, expected_related_article_type)
self.history_dates = self._get_history_dates()
class ErrataValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="validation matching 'correction' and 'corrected-article'"):
yield from super().validate_related_article(error_level=error_level, title=title)


class ArticleCorrectedValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="validation matching 'correction' and 'correction-forward'"):
"""
Validates related articles specifically for corrected articles.
yield from super().validate_related_article(error_level=error_level, title=title)

Args:
error_level (str, optional): The error level for the validation response. Defaults to "ERROR".
def validate_history_dates(self, error_level="ERROR", expected_history_event="corrected"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)

Yields:
dict: A formatted response indicating whether the validation passed or failed.
"""
yield from super().validate_related_article(error_level=error_level, title=title)

def validate_history_dates(self, error_level="ERROR"):
"""
Validates that the number of related articles matches the number of corresponding corrected dates.
class ArticleRetractedInFullValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="validation matching 'retraction' and 'retracted-article'"):
yield from super().validate_related_article(error_level=error_level, title=title)

Args:
error_level (str, optional): The error level for the validation response. Defaults to "ERROR".
def validate_history_dates(self, error_level="ERROR", expected_history_event="retracted"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)

Yields:
dict: A formatted response indicating whether the validation passed or failed.
"""
history_date_count = len(self.history_dates)
related_article_count = len(self.related_articles)

if history_date_count < related_article_count:
yield format_response(
title="validation related and corrected dates count",
parent="article",
parent_id=None,
parent_article_type=self.article_type,
parent_lang=self.article_lang,
item="related-article",
sub_item="@related-article-type",
validation_type="exist",
is_valid=False,
expected='equal numbers of <related-article type="correction-forward"> and <date type="corrected">',
obtained=f'{related_article_count} <related-article type="correction-forward"> and {history_date_count} <date type="corrected">',
advice='for each <related-article type="correction-forward">, there must be a corresponding <date type="corrected"> in <history>',
data=self.history_dates,
error_level=error_level,
)
class ArticlePartiallyRetractedValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="validation matching 'retraction' and 'partial-retraction'"):
yield from super().validate_related_article(error_level=error_level, title=title)

def _get_history_dates(self):
return [
date for date in HistoryDates(self.xml_tree).history_dates()
if "corrected" in date.get("history")
]
def validate_history_dates(self, error_level="ERROR", expected_history_event="retracted"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)