Skip to content

Commit

Permalink
fix transcripts parser (#192)
Browse files Browse the repository at this point in the history
* fix linking to segment 1

it has version 0 and we were only checking 1+

CPCN-598

* avoid html conversion for transcripts

CPCN-607

* revert test change

* avoid any links in transcripts
  • Loading branch information
petrjasek authored Feb 7, 2024
1 parent 6099a29 commit a17bbba
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
29 changes: 19 additions & 10 deletions server/cp/ingest/parser/cp_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

from superdesk import get_resource_service
from superdesk.io.feed_parsers.ninjs import NINJSFeedParser
from superdesk.text_utils import plain_text_to_html


def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]:
while version_number > 0:
def get_previous_version(
original_ingest_id: str, version_number: int
) -> Optional[Dict[str, Any]]:
while version_number >= 0:
ingest_id = f"{original_ingest_id}.{version_number}"
prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id)
prev_item = get_resource_service("archive").find_one(
req=None, ingest_id=ingest_id
)

if prev_item is not None:
return prev_item
Expand All @@ -27,12 +30,18 @@ def _transform_from_ninjs(self, ninjs: Dict[str, Any]):
ninjs["guid"] = f"{original_guid}.{version}"
item = super()._transform_from_ninjs(ninjs)
item["version"] = version
item["body_html"] = plain_text_to_html(item["body_html"])
item.setdefault("extra", {}).update(dict(
publish_ingest_id_as_guid=True,
cp_version=version,
type="transcript",
))
item["body_html"] = (
item["body_html"]
if item["body_html"].strip().startswith("<p>")
else "<p>{}</p>".format(item["body_html"])
)
item.setdefault("extra", {}).update(
dict(
publish_ingest_id_as_guid=True,
cp_version=version,
type="transcript",
)
)

previous_item = get_previous_version(original_guid, version - 1)
if previous_item is not None:
Expand Down
10 changes: 5 additions & 5 deletions server/tests/ingest/parser/cp_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class CP_Transcripts_ParseTestCase(unittest.TestCase):
def test_parse(self):
with self.app.app_context(), patch.dict(superdesk.resources, resources):
superdesk.resources["archive"].service.find_one.side_effect = [
{"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", "version": 2, "extra": {"ap_version": 999}},
{"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", "version": 0, "extra": {"ap_version": 999}},
]
items = parser.parse(get_fixture_path("cp_transcripts.json", "cp_transcripts"), provider)
superdesk.resources["archive"].service.find_one.side_effect = None
Expand All @@ -29,8 +29,8 @@ def test_parse(self):
self.assertEqual("text", item["type"])
self.assertEqual("transcript", item["extra"]["type"])
self.assertEqual(True, item["extra"]["publish_ingest_id_as_guid"])
self.assertEqual(2, item["extra"]["cp_version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.2", item["guid"])
self.assertEqual(2, item["version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["rewrite_of"])
self.assertEqual(1, item["extra"]["cp_version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["guid"])
self.assertEqual(1, item["version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.0", item["rewrite_of"])
self.assertTrue(item["body_html"].startswith("<p>laying around"))
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"guid": "d3c8487a-1757-4dde-8bb5-22ca166c1e67",
"version": "2",
"version": "1",
"type": "text",
"located": "Toronto, ON",
"language": "en-CA",
Expand Down

0 comments on commit a17bbba

Please sign in to comment.