Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: related article validation #698

Open
wants to merge 37 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
71628cf
Adapta a classe de validação ao novo modelo
Rossi-Luciano Sep 12, 2024
8cd2d4e
Adapta os testes
Rossi-Luciano Sep 12, 2024
4f19b32
Adiciona validação para os atributos de 'related-article'
Rossi-Luciano Sep 13, 2024
e1c0ad1
Adiciona validação para evento em 'history'
Rossi-Luciano Sep 13, 2024
641094a
Adiciona testes
Rossi-Luciano Sep 13, 2024
7beefdc
Remove 'related_articles_matches_history_date_validation'
Rossi-Luciano Sep 14, 2024
8e9232f
Adapta os testes
Rossi-Luciano Sep 14, 2024
fdc7fb0
Refatora e complementa validação de 'errata'
Rossi-Luciano Sep 22, 2024
e35d721
Adapta e adiciona testes
Rossi-Luciano Sep 22, 2024
5cb9511
Remove 'validation' de 'title'
Rossi-Luciano Sep 22, 2024
96a3999
Adapta os testes
Rossi-Luciano Sep 22, 2024
d3facd9
Refatora validação de 'errata'
Rossi-Luciano Sep 24, 2024
530ad59
Adapta os testes
Rossi-Luciano Sep 24, 2024
80664ef
Adiciona argumento "related_article_type"
Rossi-Luciano Sep 30, 2024
078c2b8
Adiciona teste
Rossi-Luciano Sep 30, 2024
3c9f8ef
Remove o módulo de validação de 'preprint'
Rossi-Luciano Oct 9, 2024
a1fc865
Remove o módulo de validação de 'errata'
Rossi-Luciano Oct 9, 2024
35158fc
Corrige a extensão dos arquivos de listas controladas
Rossi-Luciano Oct 9, 2024
2af892d
Adiciona 'related_article.json'
Rossi-Luciano Oct 9, 2024
14f341f
Adiciona 'related_article_type_date_type.json'
Rossi-Luciano Oct 9, 2024
619c161
Renomeia classe
Rossi-Luciano Oct 9, 2024
acfb059
Adiciona 'remove_namespaces()'
Rossi-Luciano Oct 9, 2024
6f6b968
Aplica formatação
Rossi-Luciano Oct 9, 2024
cfac0fa
Adiciona 'full_tag'
Rossi-Luciano Oct 9, 2024
626fdd3
Aplica formatação
Rossi-Luciano Oct 9, 2024
df03ab3
Corrige importações
Rossi-Luciano Oct 9, 2024
f919184
Corrige e adiciona 'docstring'
Rossi-Luciano Oct 9, 2024
c1a45b2
Remove validação de atributos (substituída pela validação de ordem)
Rossi-Luciano Oct 9, 2024
2d946ec
Adiciona 'validate_history_date'
Rossi-Luciano Oct 9, 2024
fdc7e89
Adiciona validação de ordem dos atributos
Rossi-Luciano Oct 9, 2024
3d347e6
Adiciona validação de DOI
Rossi-Luciano Oct 9, 2024
4d373d3
Adiciona 'validate'
Rossi-Luciano Oct 9, 2024
47c4598
Adiciona 'validate_related_article_matches_article_type'
Rossi-Luciano Oct 9, 2024
cbb03fe
Adapta testes de artigos relacionados
Rossi-Luciano Oct 9, 2024
e324186
Adapta testes de 'errata'
Rossi-Luciano Oct 9, 2024
5946418
Adapta testes de 'preprint'
Rossi-Luciano Oct 9, 2024
aeea305
Merge branch 'master' into feat_realated_article_validation
Rossi-Luciano Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 43 additions & 126 deletions packtools/sps/validation/errata.py
Original file line number Diff line number Diff line change
@@ -1,157 +1,74 @@
from packtools.sps.validation.utils import format_response
from packtools.sps.models.related_articles import RelatedItems
from packtools.sps.models.article_dates import HistoryDates
from packtools.sps.models.v2.related_articles import RelatedArticles
from packtools.sps.models.article_dates import ArticleDates


def _get_related_articles(xml_tree, expected_related_article_type):
return [
article for article in RelatedItems(xml_tree).related_articles
if article.get("related-article-type") == expected_related_article_type
]

class RelatedArticlesValidation:
def __init__(self, xml_tree, correspondence_list):
self.xml_tree = xml_tree
self.correspondence_list = correspondence_list
self.article_type = xml_tree.find(".").get("article-type")
self.related_articles = list(RelatedArticles(xml_tree).related_articles())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Rossi-Luciano aqui está sendo recuperado todos os related_articles. Em RelatedArticles, crie método para filtrar somente os related_articles com os quais deseja trabalhar, no lugar de criar em RelatedArticlesValidation o método get_related_article_types_by_article_type

self.history_dates = ArticleDates(xml_tree).history_dates_dict

def _format_obtained(related_article):
return (
f'<related-article ext-link-type="{related_article.get("ext-link-type")}" '
f'id="{related_article.get("id")}" related-article-type="{related_article.get("related-article-type")}" '
f'xlink:href="{related_article.get("href")}"/>'
)
def get_related_article_types_by_article_type(self, obtained_article_type):
return {item['related-article-type'] for item in self.correspondence_list
if item['article-type'] == obtained_article_type}

def get_related_article_types(self):
return {item['related-article-type'] for item in self.related_articles}

class ValidationBase:
def __init__(self, xml_tree, expected_article_type, expected_related_article_type):
self.xml_tree = xml_tree
self.article_lang = xml_tree.get("{http://www.w3.org/XML/1998/namespace}lang")
self.article_type = xml_tree.find(".").get("article-type")
self.expected_article_type = expected_article_type
self.expected_related_article_type = expected_related_article_type
self.related_articles = _get_related_articles(xml_tree, expected_related_article_type)
def get_history_events_by_related_article_type(self):
obtained_related_article_types = self.get_related_article_types()
return {item['date-type'] for item in self.correspondence_list
if item['related-article-type'] in obtained_related_article_types and item['date-type']}

def validate_related_article(self, title, error_level="ERROR"):
"""
Validates the related articles against the expected type and other criteria.
"""
if self.article_type != self.expected_article_type:
return
def get_history_events(self):
return set(self.history_dates.keys())

expected_response = f'at least one <related-article related-article-type="{self.expected_related_article_type}">'
def validate_related_articles(self, error_level="ERROR"):
expected_related_article_types = self.get_related_article_types_by_article_type(self.article_type)
obtained_related_article_types = self.get_related_article_types()

if self.related_articles:
yield from (
format_response(
title=title,
parent=related_article.get("parent"),
parent_id=related_article.get("parent_id"),
parent_article_type=related_article.get("parent_article_type"),
parent_lang=related_article.get("parent_lang"),
item="related-article",
sub_item="@related-article-type",
validation_type="match",
is_valid=True,
expected=expected_response,
obtained=_format_obtained(related_article),
advice=None,
data=related_article,
error_level=error_level
)
for related_article in self.related_articles
)
else:
missing_types = expected_related_article_types - obtained_related_article_types
if missing_types:
related_article_type = next(iter(missing_types))
yield format_response(
title=title,
title=f"matching '{self.article_type}' and '{related_article_type}'",
parent="article",
parent_id=None,
parent_article_type=self.article_type,
parent_lang=self.article_lang,
parent_lang=self.xml_tree.find(".").get("{http://www.w3.org/XML/1998/namespace}lang"),
item="related-article",
sub_item="@related-article-type",
validation_type="exist",
validation_type="match",
is_valid=False,
expected=expected_response,
expected=f'at least one <related-article related-article-type="{related_article_type}">',
obtained=None,
advice=f'provide <related-article related-article-type="{self.expected_related_article_type}">',
data=None,
advice=f'provide <related-article related-article-type="{related_article_type}">',
data=self.related_articles,
error_level=error_level
)

def validate_history_dates(self, expected_history_event, error_level="ERROR"):
"""
Validates that the number of related articles matches the number of corresponding corrected dates.
"""
history_data = list(HistoryDates(self.xml_tree).history_dates())
history_dates = [date for date in history_data if expected_history_event in date.get("history")]
history_date_count = len(history_dates)
related_article_count = len(self.related_articles)
def validate_history_events(self, error_level="ERROR"):
expected_history_events = self.get_history_events_by_related_article_type()
obtained_history_events = self.get_history_events()

if history_date_count < related_article_count:
missing_events = expected_history_events - obtained_history_events
if missing_events:
yield format_response(
title="related and corrected dates count",
title="exist historical date event for the related-article",
parent="article",
parent_id=None,
parent_article_type=self.article_type,
parent_lang=self.article_lang,
parent_lang=self.xml_tree.find(".").get("{http://www.w3.org/XML/1998/namespace}lang"),
item="related-article",
sub_item="@related-article-type",
validation_type="exist",
is_valid=False,
expected=f'equal numbers of <related-article type="{self.expected_related_article_type}"> and <date type="{expected_history_event}">',
obtained=f'{related_article_count} <related-article type="{self.expected_related_article_type}"> and {history_date_count} <date type="{expected_history_event}">',
advice=f'for each <related-article type="{self.expected_related_article_type}">, there must be a corresponding <date type="{expected_history_event}"> in <history>',
data=history_data,
expected=' '.join([f'<date date-type="{event}">' for event in missing_events]),
obtained=None,
advice='provide ' + ' '.join([f'<date date-type="{event}">' for event in missing_events]),
data=self.history_dates,
error_level=error_level,
)


class SpecificValidation(ValidationBase):
"""
Base class for specific validations to handle common functionality for Errata, ArticleCorrected,
ArticleRetracted, and ArticlePartiallyRetracted validations.
"""

def __init__(self, xml_tree, expected_article_type, expected_related_article_type):
super().__init__(xml_tree, expected_article_type, expected_related_article_type)

def validate_related_article(self, error_level="ERROR", title=None):
"""
Common logic for validating related articles, where `title` must be provided by subclasses.
"""
if title is None:
raise ValueError("Title must be provided for the validation.")
yield from super().validate_related_article(title=title, error_level=error_level)

def validate_history_dates(self, error_level="ERROR", expected_history_event=None):
"""
Common logic for validating history dates, where `expected_history_event` must be provided by subclasses.
"""
if expected_history_event is None:
raise ValueError("Expected history event must be provided.")
yield from super().validate_history_dates(expected_history_event=expected_history_event, error_level=error_level)


class ErrataValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="matching 'correction' and 'corrected-article'"):
yield from super().validate_related_article(error_level=error_level, title=title)


class ArticleCorrectedValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="matching 'correction' and 'correction-forward'"):
yield from super().validate_related_article(error_level=error_level, title=title)

def validate_history_dates(self, error_level="ERROR", expected_history_event="corrected"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)


class ArticleRetractedInFullValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="matching 'retraction' and 'retracted-article'"):
yield from super().validate_related_article(error_level=error_level, title=title)

def validate_history_dates(self, error_level="ERROR", expected_history_event="retracted"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)


class ArticlePartiallyRetractedValidation(SpecificValidation):
def validate_related_article(self, error_level="ERROR", title="matching 'retraction' and 'partial-retraction'"):
yield from super().validate_related_article(error_level=error_level, title=title)

def validate_history_dates(self, error_level="ERROR", expected_history_event="retracted"):
yield from super().validate_history_dates(error_level=error_level, expected_history_event=expected_history_event)