From 7e0ad6bb4c0098970fcf1b7c3b2f4c5e51cbc7e1 Mon Sep 17 00:00:00 2001 From: Ketan <73937490+devketanpro@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:46:05 +0530 Subject: [PATCH] fix the Belga dpa parser to take better care of the ed note [SDBELGA-910] (#661) * fix the Belga dpa parser to take better care of the ed note [SDBELGA-910] * create a helper func * fix black SDBELGA-910 * refactore code via black * update logic for extracting ednotew * address comment * remove unwanted code * remove unused func --- .../io/feed_parsers/belga_dpa_newsml_2_0.py | 10 +- .../format_news_events_tommorow.py | 8 +- server/belga/publish/belga_newsml_1_2.py | 8 +- server/belga/search_providers.py | 6 +- .../feed_parsers/belga_dpa_newsml_2_0_test.py | 25 +-- server/tests/io/fixtures/3FB1C600A1AC5567.xml | 171 ++++++++++++++++++ 6 files changed, 197 insertions(+), 31 deletions(-) create mode 100644 server/tests/io/fixtures/3FB1C600A1AC5567.xml diff --git a/server/belga/io/feed_parsers/belga_dpa_newsml_2_0.py b/server/belga/io/feed_parsers/belga_dpa_newsml_2_0.py index 1304be12..d5b76e3a 100644 --- a/server/belga/io/feed_parsers/belga_dpa_newsml_2_0.py +++ b/server/belga/io/feed_parsers/belga_dpa_newsml_2_0.py @@ -174,10 +174,14 @@ def parse_inline_content(self, tree, item): def parse_item_meta(self, tree, item): super().parse_item_meta(tree, item) + meta = tree.find(self.qname("itemMeta")) - edNote = meta.find(self.qname("edNote")) - text = ElementTree.tostring(edNote, encoding="utf-8", method="text") - item["ednote"] = text.decode("utf-8").replace(" \n", "").replace(" ", "") + item["ednote"] = "\n".join( + edNote.text.strip() + for edNote in meta.findall(self.qname("edNote")) + if "dpaednoterole:correctionshort" == edNote.attrib.get("role", "") + and edNote.text + ) def parse_content_meta(self, tree, item): meta = super().parse_content_meta(tree, item) diff --git a/server/belga/planning_exports/format_news_events_tommorow.py b/server/belga/planning_exports/format_news_events_tommorow.py index 58875b67..10146a79 100644 --- a/server/belga/planning_exports/format_news_events_tommorow.py +++ b/server/belga/planning_exports/format_news_events_tommorow.py @@ -37,9 +37,11 @@ def format_event_for_tommorow( # Format event details formatted_event = { "subject": ",".join(get_subjects(event, "fr")), - "calendars": event["calendars"][0]["qcode"].capitalize() - if event.get("calendars") - else "", + "calendars": ( + event["calendars"][0]["qcode"].capitalize() + if event.get("calendars") + else "" + ), "contacts": get_formatted_contacts(event), "coverages": get_coverages(event, locale), "location": get_item_location(event, locale), diff --git a/server/belga/publish/belga_newsml_1_2.py b/server/belga/publish/belga_newsml_1_2.py index 8a6de9c9..94fd4ad0 100644 --- a/server/belga/publish/belga_newsml_1_2.py +++ b/server/belga/publish/belga_newsml_1_2.py @@ -355,9 +355,11 @@ def _format_text(self, newscomponent_1_level, item): newscomponent_2_level, "Role", { - "FormalName": item["_role"] - if item.get("profile") in self.SD_CP_NAME_ROLE_MAP - else item.get("_role").split(" -")[0].title() + "FormalName": ( + item["_role"] + if item.get("profile") in self.SD_CP_NAME_ROLE_MAP + else item.get("_role").split(" -")[0].title() + ) }, ) # NewsLines diff --git a/server/belga/search_providers.py b/server/belga/search_providers.py index 02d70db5..0e8d7f2d 100644 --- a/server/belga/search_providers.py +++ b/server/belga/search_providers.py @@ -542,9 +542,9 @@ def format_list_item(self, data): "sign_off": self.get_sign_off(data.get("authors")), "authors": self.get_authors(data.get("authors")), "subject": self.get_subjects(data), - "renditions": self.get_renditions(data) - if data.get("assetType") == "Picture" - else {}, + "renditions": ( + self.get_renditions(data) if data.get("assetType") == "Picture" else {} + ), # SDBELGA-665 "ednote": get_text(data.get("editorialInfo")), } diff --git a/server/tests/io/feed_parsers/belga_dpa_newsml_2_0_test.py b/server/tests/io/feed_parsers/belga_dpa_newsml_2_0_test.py index b1e555b0..b8144c4e 100644 --- a/server/tests/io/feed_parsers/belga_dpa_newsml_2_0_test.py +++ b/server/tests/io/feed_parsers/belga_dpa_newsml_2_0_test.py @@ -43,25 +43,6 @@ def test_content(self): self.assertEqual(item["type"], "text") self.assertEqual(str(item["versioncreated"]), "2019-06-03 13:00:01+00:00") self.assertEqual(item["pubstatus"], "usable") - expected_ednote = ( - "\nNotizblock" - "\nRedaktionelle Hinweise" - "\n Migranten sind nach Definition der Internationalen Organisation ‎für Migration (IOM) alle Menschen," - " die ihren Wohnort verlassen – egal ‎aus welchen Gründen, wie lange oder ob freiwillig oder " - "‎unfreiwillig. Flüchtlinge dagegen suchen Schutz vor Krieg oder vor ‎drohender Verfolgung, etwa " - "wegen ihrer Religion, Nationalität oder ‎ihrer politischen Überzeugung. Damit sind Flüchtlinge " - "auch Migranten‎, aber nicht alle Migranten Flüchtlinge." - "\n Internet" - "\nKüstenwache Mitteilungen" - "\nOrte" - "\n [Alexandroupolis](Alexandroupolis 681 00, Griechenland)" - "\n[Kleininsel Agathonisi](Agathonisi, Griechenland)" - "\n Die folgenden Informationen sind nicht zur Veröffentlichung bestimmt" - "\nKontakte" - "\n Autor: Takis Tsafos (Athen), +30 6944 33 24 77, " - "\ndpa tt xx n1\n" - ) - self.assertEqual(item["ednote"], expected_ednote) self.assertEqual(item["urgency"], 3) self.assertEqual( item["headline"], @@ -184,3 +165,9 @@ def test_new_mappings(self): expected_subject.sort(key=lambda i: i["name"]) self.assertEqual(item["extra"], {"city": "Berlin", "country": "Germany"}) self.assertEqual(item["genre"], [{"name": "EXTRA"}]) + + def test_edNote_content(self): + filename = "3FB1C600A1AC5567.xml" + self._initialize_parser(filename) + item = self.item[0] + self.assertEqual(item["ednote"], "updated with a photo") diff --git a/server/tests/io/fixtures/3FB1C600A1AC5567.xml b/server/tests/io/fixtures/3FB1C600A1AC5567.xml new file mode 100644 index 00000000..0cf40e6d --- /dev/null +++ b/server/tests/io/fixtures/3FB1C600A1AC5567.xml @@ -0,0 +1,171 @@ + + +
+ 2024-07-03T13:14:00+02:00 + 3 + ines +
+ + + + + + + Deutsche Presse-Agentur GmbH + dpa - Deutsche Presse Agentur GmbH / Hamburg Trade Register, HRB 68431 + + (c) 2024 dpa Deutsche Presse Agentur GmbH + Use only with the written agreement with dpa + + + + Text Item(s) + + + Deutsche Presse-Agentur GmbH + + 2024-07-03T13:13:54+02:00 + + Usable + + service-ines + dpa-G2 + + Englischer Dienst multimedial + + + Englischer Dienst + + dpa cis aha wjh + Reporting by: Ciarán Sunderland and Ansgar Haase in Brussels + Editing by: Bill Heaney, +49 30 2852 31472, <international@dpa.com> + dpa photos + updated with a photo + +
+
+

Notebook

+
+
+

The following information is not intended for publication

+

Editorial contacts

+
    +
  • Reporting by: Ciarán Sunderland and Ansgar Haase in Brussels
  • +
  • Editing by: Bill Heaney, +49 30 2852 31472, <international@dpa.com>
  • +
+
+

dpa cis aha wjh

+
+
+ + Update + + + + Picture Item(s) + + NATO allies nix multi-year Ukraine aid plan + +
+ + 3 + + Brussels + + + sunderland.ciaran + + + sunderland.ciaran + + + heaney.william + + + qassem.shorook + + eca:0048:3:i:147:dpa:0772::20240703131400MESZ + + DEVELOPING + + + Ukraine + + + + + Europe + + + Europe + + + + Russia + + + + + Europe + + + Europe + + + + conflict + + conflict, war and peace + + + NATO + + i + + + NATO allies nix multi-year Ukraine aid plan + Brussels (dpa) - + dpa + + + + non public + + + + + + + + + + + NATO allies nix multi-year Ukraine aid plan + + +
+ +
    +
  • conflict
  • +
  • Ukraine
  • +
  • Russia
  • +
  • NATO
  • +
+

DEVELOPING

+

NATO allies nix multi-year Ukraine aid plan

+
+
+

+ NATO Secretary General Jens Stoltenberg has failed to get allies to commit to a multi-year financial pledge to support Ukraine, according to information obtained by dpa on Wednesday.

+

Ahead of a NATO leaders' summit in Washington, allies would only commit to support for Ukraine worth €40 billion ($43 billion) within the next year, dpa learnt from delegations to the alliance.

+

Allies also did not reach an agreement on sharing the financial costs of supporting Ukraine, with NATO members vaguely stating that the gross domestic product (GDP) of a country's economy should play a role.

+

At a meeting of NATO defence ministers in June, Stoltenberg called on allies to agree a plan to maintain over the long term their current level of support to Ukraine, which he put at €40 billion per year.

+

The burden would be divided up according to NATO countries' GDP, with the United States contributing 50%.

+
+ + +
+
+
+
+