From 3e93b588a2ed62514eb7e32e47895a1ec94603a1 Mon Sep 17 00:00:00 2001 From: Petr Jasek Date: Mon, 5 Feb 2024 15:14:09 +0100 Subject: [PATCH 1/4] fix linking to segment 1 it has version 0 and we were only checking 1+ CPCN-598 --- server/cp/ingest/parser/cp_transcripts.py | 2 +- server/tests/ingest/parser/cp_transcripts.py | 10 +++++----- .../parser/fixtures/cp_transcripts/cp_transcripts.json | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/cp/ingest/parser/cp_transcripts.py b/server/cp/ingest/parser/cp_transcripts.py index 8a4af0e8..0cc45986 100644 --- a/server/cp/ingest/parser/cp_transcripts.py +++ b/server/cp/ingest/parser/cp_transcripts.py @@ -6,7 +6,7 @@ def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]: - while version_number > 0: + while version_number >= 0: ingest_id = f"{original_ingest_id}.{version_number}" prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id) diff --git a/server/tests/ingest/parser/cp_transcripts.py b/server/tests/ingest/parser/cp_transcripts.py index 1b593c95..7a322cdd 100644 --- a/server/tests/ingest/parser/cp_transcripts.py +++ b/server/tests/ingest/parser/cp_transcripts.py @@ -20,7 +20,7 @@ class CP_Transcripts_ParseTestCase(unittest.TestCase): def test_parse(self): with self.app.app_context(), patch.dict(superdesk.resources, resources): superdesk.resources["archive"].service.find_one.side_effect = [ - {"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", "version": 2, "extra": {"ap_version": 999}}, + {"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", "version": 0, "extra": {"ap_version": 999}}, ] items = parser.parse(get_fixture_path("cp_transcripts.json", "cp_transcripts"), provider) superdesk.resources["archive"].service.find_one.side_effect = None @@ -29,8 +29,8 @@ def test_parse(self): self.assertEqual("text", item["type"]) self.assertEqual("transcript", item["extra"]["type"]) self.assertEqual(True, item["extra"]["publish_ingest_id_as_guid"]) - self.assertEqual(2, item["extra"]["cp_version"]) - self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.2", item["guid"]) - self.assertEqual(2, item["version"]) - self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["rewrite_of"]) + self.assertEqual(1, item["extra"]["cp_version"]) + self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["guid"]) + self.assertEqual(1, item["version"]) + self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", item["rewrite_of"]) self.assertTrue(item["body_html"].startswith("

laying around")) diff --git a/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json b/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json index 6acd4258..39c01126 100644 --- a/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json +++ b/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json @@ -1,6 +1,6 @@ { "guid": "d3c8487a-1757-4dde-8bb5-22ca166c1e67", - "version": "2", + "version": "1", "type": "text", "located": "Toronto, ON", "language": "en-CA", From c5a660f66e724f325409bb7f5737b0dfa25e79d7 Mon Sep 17 00:00:00 2001 From: Petr Jasek Date: Tue, 6 Feb 2024 15:34:57 +0100 Subject: [PATCH 2/4] avoid html conversion for transcripts CPCN-607 --- server/cp/ingest/parser/cp_transcripts.py | 26 ++++++++++++++------ server/tests/ingest/parser/cp_transcripts.py | 1 - 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/server/cp/ingest/parser/cp_transcripts.py b/server/cp/ingest/parser/cp_transcripts.py index 0cc45986..d8ab9e1d 100644 --- a/server/cp/ingest/parser/cp_transcripts.py +++ b/server/cp/ingest/parser/cp_transcripts.py @@ -5,10 +5,14 @@ from superdesk.text_utils import plain_text_to_html -def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]: +def get_previous_version( + original_ingest_id: str, version_number: int +) -> Optional[Dict[str, Any]]: while version_number >= 0: ingest_id = f"{original_ingest_id}.{version_number}" - prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id) + prev_item = get_resource_service("archive").find_one( + req=None, ingest_id=ingest_id + ) if prev_item is not None: return prev_item @@ -27,12 +31,18 @@ def _transform_from_ninjs(self, ninjs: Dict[str, Any]): ninjs["guid"] = f"{original_guid}.{version}" item = super()._transform_from_ninjs(ninjs) item["version"] = version - item["body_html"] = plain_text_to_html(item["body_html"]) - item.setdefault("extra", {}).update(dict( - publish_ingest_id_as_guid=True, - cp_version=version, - type="transcript", - )) + item["body_html"] = ( + item["body_html"] + if item["body_html"].strip().startswith("

") + else plain_text_to_html(item["body_html"]) + ) + item.setdefault("extra", {}).update( + dict( + publish_ingest_id_as_guid=True, + cp_version=version, + type="transcript", + ) + ) previous_item = get_previous_version(original_guid, version - 1) if previous_item is not None: diff --git a/server/tests/ingest/parser/cp_transcripts.py b/server/tests/ingest/parser/cp_transcripts.py index 7a322cdd..32bdbd51 100644 --- a/server/tests/ingest/parser/cp_transcripts.py +++ b/server/tests/ingest/parser/cp_transcripts.py @@ -33,4 +33,3 @@ def test_parse(self): self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["guid"]) self.assertEqual(1, item["version"]) self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", item["rewrite_of"]) - self.assertTrue(item["body_html"].startswith("

laying around")) From c629d4bb9305f3d1be4f478b62802eea1a87b298 Mon Sep 17 00:00:00 2001 From: Petr Jasek Date: Tue, 6 Feb 2024 17:38:06 +0100 Subject: [PATCH 3/4] revert test change --- server/tests/ingest/parser/cp_transcripts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/tests/ingest/parser/cp_transcripts.py b/server/tests/ingest/parser/cp_transcripts.py index 32bdbd51..7a322cdd 100644 --- a/server/tests/ingest/parser/cp_transcripts.py +++ b/server/tests/ingest/parser/cp_transcripts.py @@ -33,3 +33,4 @@ def test_parse(self): self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["guid"]) self.assertEqual(1, item["version"]) self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", item["rewrite_of"]) + self.assertTrue(item["body_html"].startswith("

laying around")) From 0a8b46c767b415ee8eb88d532a1505ee697f9cf6 Mon Sep 17 00:00:00 2001 From: Petr Jasek Date: Wed, 7 Feb 2024 09:23:43 +0100 Subject: [PATCH 4/4] avoid any links in transcripts --- server/cp/ingest/parser/cp_transcripts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/cp/ingest/parser/cp_transcripts.py b/server/cp/ingest/parser/cp_transcripts.py index d8ab9e1d..c5b73a50 100644 --- a/server/cp/ingest/parser/cp_transcripts.py +++ b/server/cp/ingest/parser/cp_transcripts.py @@ -2,7 +2,6 @@ from superdesk import get_resource_service from superdesk.io.feed_parsers.ninjs import NINJSFeedParser -from superdesk.text_utils import plain_text_to_html def get_previous_version( @@ -34,7 +33,7 @@ def _transform_from_ninjs(self, ninjs: Dict[str, Any]): item["body_html"] = ( item["body_html"] if item["body_html"].strip().startswith("

") - else plain_text_to_html(item["body_html"]) + else "

{}

".format(item["body_html"]) ) item.setdefault("extra", {}).update( dict(