From 9bcb65be292bb7aaff5ae2286b80c55aedc35f0b Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 1 Aug 2024 02:42:38 -0300 Subject: [PATCH 1/5] refactor xml_pmc_aff for aff elements --- packtools/sps/formats/pmc.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/packtools/sps/formats/pmc.py b/packtools/sps/formats/pmc.py index 7a1374cf1..ccd67b024 100644 --- a/packtools/sps/formats/pmc.py +++ b/packtools/sps/formats/pmc.py @@ -43,14 +43,29 @@ def xml_pmc_aff(xml_tree): """ affs = xml_tree.findall(".//aff") for aff in affs: - aff_institution = aff.find("./institution[@content-type='original']").text + original_institution = aff.find("./institution[@content-type='original']") + if original_institution is not None: + aff_institution = original_institution.text + else: + aff_with_address = [] + aff_with_address.append(aff.find("./institution[@content-type='orgname']").text) + + addr_line = aff.find("./addr-line") + if addr_line is not None: + named_contents = addr_line.findall(".//named-content") + aff_with_address.extend([named_content.text for named_content in named_contents]) + + country = aff.find("./country") + if country is not None: + aff_with_address.append(country.text) + aff_institution = ", ".join(aff_with_address) + for institution in aff.findall(".//institution"): aff.remove(institution) - aff.remove(aff.find("./addr-line")) - - aff.remove(aff.find("./country")) + for element in [aff.find("./addr-line"), aff.find("./country")]: + aff.remove(element) node_label = aff.find("./label") @@ -154,3 +169,7 @@ def xml_pmc_ref(xml_tree): refs = xml_tree.findall(".//ref") for ref in refs: ref.remove(ref.find("./mixed-citation")) + + +def xml_pmc_refactor_pub_date(xmltree): + ... \ No newline at end of file From df5ccaa826554ca693f26961fe2282e77ed9e8dd Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 9 Aug 2024 15:04:01 -0300 Subject: [PATCH 2/5] remove funcao xml_pmc_refactor_pub_date --- packtools/sps/formats/pmc.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/packtools/sps/formats/pmc.py b/packtools/sps/formats/pmc.py index ccd67b024..9391789ee 100644 --- a/packtools/sps/formats/pmc.py +++ b/packtools/sps/formats/pmc.py @@ -170,6 +170,3 @@ def xml_pmc_ref(xml_tree): for ref in refs: ref.remove(ref.find("./mixed-citation")) - -def xml_pmc_refactor_pub_date(xmltree): - ... \ No newline at end of file From 16152ecba13a9f38537d7bd4c7f73a925eda99d8 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 9 Aug 2024 15:04:13 -0300 Subject: [PATCH 3/5] Adiciona teste test_xml_pmc_without_original --- tests/sps/formats/test_pmc.py | 71 +++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/sps/formats/test_pmc.py b/tests/sps/formats/test_pmc.py index 2db4c7167..c6e48cd93 100644 --- a/tests/sps/formats/test_pmc.py +++ b/tests/sps/formats/test_pmc.py @@ -492,6 +492,77 @@ def test_xml_pmc_ref(self): self.assertEqual(obtained, expected) + def test_xml_pmc_without_original(self): + self.maxDiff = None + expected = ( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil' + '' + '' + '' + '
' + + ) + xml_tree = ET.fromstring( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Universidade Federal do Rio Grande do Sul' + 'Escola de Enfermagem' + 'Programa de Pós-Graduação em Enfermagem' + '' + 'Porto Alegre' + 'RS' + '' + 'Brazil' + '' + '' + '' + '
' + ) + + xml_pmc_aff(xml_tree) + + obtained = ET.tostring(xml_tree, encoding="utf-8").decode("utf-8") + + self.assertEqual(obtained, expected) + if __name__ == '__main__': unittest.main() From 3b70df745c2ea482412cf82a2facbdca0bfae357 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sun, 11 Aug 2024 21:06:07 -0300 Subject: [PATCH 4/5] Substitui findall por xpath para named-content, state e city --- packtools/sps/formats/pmc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packtools/sps/formats/pmc.py b/packtools/sps/formats/pmc.py index 9391789ee..a7fb4701d 100644 --- a/packtools/sps/formats/pmc.py +++ b/packtools/sps/formats/pmc.py @@ -52,7 +52,7 @@ def xml_pmc_aff(xml_tree): addr_line = aff.find("./addr-line") if addr_line is not None: - named_contents = addr_line.findall(".//named-content") + named_contents = addr_line.xpath(".//named-content | .//state | .//city ") aff_with_address.extend([named_content.text for named_content in named_contents]) country = aff.find("./country") From 1b8a4a673422b7c5b84688393daea6bbff037fd1 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sun, 11 Aug 2024 21:06:43 -0300 Subject: [PATCH 5/5] Adiciona teste para xml com city e satate --- tests/sps/formats/test_pmc.py | 71 +++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/sps/formats/test_pmc.py b/tests/sps/formats/test_pmc.py index c6e48cd93..8c86b182c 100644 --- a/tests/sps/formats/test_pmc.py +++ b/tests/sps/formats/test_pmc.py @@ -563,6 +563,77 @@ def test_xml_pmc_without_original(self): self.assertEqual(obtained, expected) + def test_xml_pmc_with_state_and_city(self): + self.maxDiff = None + expected = ( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Fundação Oswaldo Cruz, Manguinhos, RJ, Brasil' + '' + '' + '' + '
' + + ) + xml_tree = ET.fromstring( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Fundação Oswaldo Cruz' + 'Escola Nacional de Saúde Pública Sérgio Arouca' + 'Centro de Estudos da Saúde do Trabalhador e Ecologia Humana' + '' + 'Manguinhos' + 'RJ' + '' + 'Brasil' + '' + '' + '' + '
' + ) + + xml_pmc_aff(xml_tree) + + obtained = ET.tostring(xml_tree, encoding="utf-8").decode("utf-8") + + self.assertEqual(obtained, expected) + if __name__ == '__main__': unittest.main()