From 575ebb6cf108da5da48fd9a9b87860805f6d38bd Mon Sep 17 00:00:00 2001 From: MarkLark86 Date: Wed, 1 Nov 2023 09:34:48 +1100 Subject: [PATCH] [CPCN-27] CP Transcripts ingest parser (#187) --- server/cp/ingest/__init__.py | 2 + server/cp/ingest/parser/cp_transcripts.py | 40 ++++++++++++++++ server/tests/ingest/parser/cp_transcripts.py | 36 ++++++++++++++ .../fixtures/cp_onclusive/cp_onclusive.json | 2 + .../cp_transcripts/cp_transcripts.json | 48 +++++++++++++++++++ 5 files changed, 128 insertions(+) create mode 100644 server/cp/ingest/parser/cp_transcripts.py create mode 100644 server/tests/ingest/parser/cp_transcripts.py create mode 100644 server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json diff --git a/server/cp/ingest/__init__.py b/server/cp/ingest/__init__.py index c7a016f7..8895c262 100644 --- a/server/cp/ingest/__init__.py +++ b/server/cp/ingest/__init__.py @@ -4,12 +4,14 @@ from .parser.businesswire import BusinessWireParser from .parser.globenewswire import GlobeNewswireParser from .parser.cp_onclusive import CPOnclusiveFeedParser +from .parser.cp_transcripts import CPTranscriptsFeedParser def init_app(app): # register new parsers register_feed_parser(BusinessWireParser.NAME, BusinessWireParser()) register_feed_parser(GlobeNewswireParser.NAME, GlobeNewswireParser()) + register_feed_parser(CPTranscriptsFeedParser.NAME, CPTranscriptsFeedParser()) # override core parsers registered_feed_parsers[CP_APMediaFeedParser.NAME] = CP_APMediaFeedParser() diff --git a/server/cp/ingest/parser/cp_transcripts.py b/server/cp/ingest/parser/cp_transcripts.py new file mode 100644 index 00000000..8a4af0e8 --- /dev/null +++ b/server/cp/ingest/parser/cp_transcripts.py @@ -0,0 +1,40 @@ +from typing import Dict, Any, Optional + +from superdesk import get_resource_service +from superdesk.io.feed_parsers.ninjs import NINJSFeedParser +from superdesk.text_utils import plain_text_to_html + + +def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]: + while version_number > 0: + ingest_id = f"{original_ingest_id}.{version_number}" + prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id) + + if prev_item is not None: + return prev_item + version_number -= 1 + + return None + + +class CPTranscriptsFeedParser(NINJSFeedParser): + NAME = "cp_transcripts" + label = "CP Transcripts" + + def _transform_from_ninjs(self, ninjs: Dict[str, Any]): + original_guid = ninjs["guid"] + version = int(ninjs["version"]) + ninjs["guid"] = f"{original_guid}.{version}" + item = super()._transform_from_ninjs(ninjs) + item["version"] = version + item["body_html"] = plain_text_to_html(item["body_html"]) + item.setdefault("extra", {}).update(dict( + publish_ingest_id_as_guid=True, + cp_version=version, + type="transcript", + )) + + previous_item = get_previous_version(original_guid, version - 1) + if previous_item is not None: + item["rewrite_of"] = previous_item["ingest_id"] + return item diff --git a/server/tests/ingest/parser/cp_transcripts.py b/server/tests/ingest/parser/cp_transcripts.py new file mode 100644 index 00000000..1b593c95 --- /dev/null +++ b/server/tests/ingest/parser/cp_transcripts.py @@ -0,0 +1,36 @@ +import unittest +from unittest.mock import patch + +import flask +import superdesk + +from cp.ingest import CPTranscriptsFeedParser + +from tests.ingest.parser import get_fixture_path +from tests.mock import resources + + +provider = {} +parser = CPTranscriptsFeedParser() + + +class CP_Transcripts_ParseTestCase(unittest.TestCase): + app = flask.Flask(__name__) + + def test_parse(self): + with self.app.app_context(), patch.dict(superdesk.resources, resources): + superdesk.resources["archive"].service.find_one.side_effect = [ + {"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", "version": 2, "extra": {"ap_version": 999}}, + ] + items = parser.parse(get_fixture_path("cp_transcripts.json", "cp_transcripts"), provider) + superdesk.resources["archive"].service.find_one.side_effect = None + + item = items[0] + self.assertEqual("text", item["type"]) + self.assertEqual("transcript", item["extra"]["type"]) + self.assertEqual(True, item["extra"]["publish_ingest_id_as_guid"]) + self.assertEqual(2, item["extra"]["cp_version"]) + self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.2", item["guid"]) + self.assertEqual(2, item["version"]) + self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["rewrite_of"]) + self.assertTrue(item["body_html"].startswith("

laying around")) diff --git a/server/tests/ingest/parser/fixtures/cp_onclusive/cp_onclusive.json b/server/tests/ingest/parser/fixtures/cp_onclusive/cp_onclusive.json index 1d0fc48e..ffa165f3 100644 --- a/server/tests/ingest/parser/fixtures/cp_onclusive/cp_onclusive.json +++ b/server/tests/ingest/parser/fixtures/cp_onclusive/cp_onclusive.json @@ -47,7 +47,9 @@ 4708 ], "createdDate": "2021-05-04T21:19:10.2", + "createdDateUtc": "2021-05-04T19:19:10.2", "lastEditDate": "2022-05-10T13:14:34.873", + "lastEditDateUtc": "2022-05-10T11:14:34.873", "deleted": false, "deletionDate": null, "website": "https://www.canadianinstitute.com/anti-money-laundering-financial-crime/", diff --git a/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json b/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json new file mode 100644 index 00000000..6acd4258 --- /dev/null +++ b/server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json @@ -0,0 +1,48 @@ +{ + "guid": "d3c8487a-1757-4dde-8bb5-22ca166c1e67", + "version": "2", + "type": "text", + "located": "Toronto, ON", + "language": "en-CA", + "headline": "History TV - Rust Valley Restorers, 7/6/2023 2:00:00 PM UTC - Segment #2", + "urgency": 3, + "pubstatus": "usable", + "body_html": "laying around \nmy mom's property. \nSome sheep \nmight wander in here \nand blow itself up. \nLet's see, is this \ngonna work, Ave? \nIt's a pretty \nheavy barrel. \nPerfect! \n[Narrator]: \nUsing an old rusty barrel \nand an empty ice cream pail... \nBring it over here, Shaf, \nwe're gonna stir it up. \n[Narrator]: Mike and Shafin \nprepare the tannerite \nfor target practice. \nOkay, now, \nwe should probably \njust add one of these \nat a time, eh? \n[Mike]: It's not really \nclassified as an explosive, \nbut when you hit it \nwith a bullet, \nit does explode, \nor it makes enough gas \nthat it's almost an explosive, \nbut whatever, \nit can hurt you bad. \nWatching at home, \nchildren, \ndo not do this. \n[Mike]: It's dangerous stuff \nin the hands \nof the wrong person, \nlike Shafin. \nShe's a-gonna \ngo boom. \n[cackling] \nOkay. \nGood? \nGuys, stay behind. \n[shoots] \n[laughing wildly] \nYou missed! \nYour turn. \nOkay, \nlet's do it! \n[shoots] \nOkay, Ave, \nyour turn! \nIf you aim for the white, \nyou'll hit it. \n[shoots] \n[bleep] \n[Mike]: Sure you don't wanna \ntake a crack, Helen? \nOh, I could probably try. \n[Avery]: Give a poke \nat 'er there, Grandma. \nPut the bullet in that. \nOkay. \n[shoots] \nWhoo-hoo-hoo! \nHoly [bleep]! \n[Shafin laughing] Oh! \nDid you see that piece \ngo flyin'? \n[laughing in delight] \n[Avery]: I'm fairly impressed. \nMy mother, \nafter all these years, \nshe's still \nan incredibly good shot. \nShe's one tough lady. \nWhoo-hoo! Whoo! \n[Mike]: \nLet's go check it out. \nI think that worked quite well. \nThat stuff is potent. \nHoly guacamole! \nMan! \nHere's my mom. \nDead shot, she is, eh? \nYeah. \nShe blew 'er apart \npretty damn good, man. \n[laughing] \nLook, Ave. Watch. \n[Narrator]: Mike and Avery \nhave one more thing to do \nbefore they hit the road... \n[Mike]: I've noticed \nthe structural defects \nof this building. \nVoilà! \n[Narrator]: ...removing \nthe fragile Model T \nfrom the shed \nwithout damaging it. \nBack up the trailer, \ncome along, and on. \nYou just wrecked \nmy family heritage. \n-Well, we'll fix it. \n-[cackling] \nOkay, line me up \nthere, big feller. \nHow much further? \nGood enough. \nWe got one Mike-power \non the winch here. \nShouldn't be a problem. \nWell, there ya go. \nI think we gotta bounce it \nover an inch, Ave. \n[Mike]: It's very important \nto be careful \ngetting this thing out. \nThe thing's basically \nnine-tenths \nof a hundred years old. \nJust keep going! \nWe're aren't gonna \nhurt that fender? \nNo! \n[Mike]: We don't want \nto scratch it, \nwe don't wanna dent it. \nWe just wanna get it back \nto where we can do \nsomething with it. \nThat's looking \npretty close. \nIt is. \nIt's jammed on the fender, \nand it's jammed on... \nKeep going. \nI don't wanna \nwreck it, Ave. \nAve, we're bending \nthe window! \nNo, we're not. \nKeep going! \nIsn't the fender rubbing? \n-Keep pulling! \n-Okay! \n-[crunch] \n-What was that? \nOh, the steering \njust broke on 'er. \nWhat? Really? \nWhat did you \njust-- Avery. \nI don't know, \nsomething just happened \nin the steering box. \nYou're not capable \nof being gentle. \n[Mike]: \nAvery's always in a hurry. \nI don't know what for, \nbut why rush to cause more work? \nHere. \nHang on. \nOh, it's stuck-- \nHang on. \nThere! \nWe didn't do \ntoo much damage \nby the looks of it, Ave. \nWell, the front fender here \ngot a little warble in it. \n[Mike]: I mean, Helen's \nowned that Model T \n25 years. \nI know it's her pride and joy. \nIt'd be kinda cool \nto get it up and running for... \nfor the old girl, \nand kinda surprise her. \nI mean, in the light of day, \nit doesn't look \nthat bad, Ave. \nWhat more can we \nexpect from something \nthat's 90 years old? \nThere's two gerbils \nliving in the radiator. \n[chuckles] \n[Mike]: Well, it's been \na pleasure, Helen. \nThank you \nfor everything, \nand it was so nice \nto visit you. \nAnd to meet the kids. \nAnd to meet \nthe kids. \nIt was good to have \nMike and Avery around, \nand got a couple things \ngoing, and... \nthe lambs, \nand what have you, \nand have a few good \nold-fashioned arguments again! \n[laughs] \nHere, why don't \nI hold him \nwhile you have \na tearful goodbye \nwith your \nfavourite child? \n[laughing] \nMmm... \nIt was good \nseein' ya. \nHave a good trip home. \nYeah, you think about, \none of these days, \ngetting your affairs \ntogether, maybe. \nYou know, my mother's \nworked hard her whole life. \nShe needs to enjoy \nthe quality of life \nthat she deserves, \nsell some of her stuff, \nand, uh, lead a normal life... \nWell, I can't go \ntill these guys \nare big enough to ship. \n[Avery]: ...but I guess, to her, \nthis is a normal life. \nYou know, it's kind of \nlike Mike. \nShe comes out here, and goes, \n\"Oh, look at all my treasures!\" \nSo, what do you do? \nShe's happy. \nYou don't want \nto take that home \nfor a shop animal? \nNo. Okay, well, we got \na thousand K to go there, Ave. \nLet's make a mile. \nThanks again, Helen. \n-See ya! \nOh, thank you. \n-Okay. \nYou be good. \nOkay. \nBunch of prickly pears! \n[laughing] \nSee ya, Ave! \n[honking] \nBye! \nSee ya! \nGoodbye to the boys. \nDon't screw up the car! \n[Helen laughing] \nAt Nat Geo, the shark obsession \nruns deep. \n[Cheering] \nSo we had to make shark fest \nbigger than ever. \nWe're gonna dive in and take a \nlook. \n-Woah! \n-This is a shark fest! \n", + "slugline": "Rust-Valley-Restorers", + "firstcreated": "2023-07-06T14:00:01+0000", + "firstpublished": "2023-07-06T14:01:01+0000", + "source": "TV Eyes", + "extra": { + "headline_extended": "A visit to Avery's family farm leads to the discovery of unexpected treasures and puts Mike's veterinary skills to the test. Meanwhile, a new member of the Rust Valley rat rod community needs help rebuilding his dream." + }, + "profile": "Story", + "priority": 5, + "subject": [ + { + "code": "lifestyle", + "name": "Lifestyle", + "scheme": "tveyes.com" + }, + { + "code": "20000565", + "name": "lifestyle", + "scheme": "subject_custom" + } + ], + "service": [ + { + "code": "g", + "name": "National" + } + ], + "charcount": 874, + "wordcount": 151, + "readtime": 1, + "products": [ + { + "code": "5f3d90fd77eb2ec9ce5c2a2d", + "name": "Stories" + } + ], + "uri": "https://cms.cp.org/contentapi/items/3764e559-835f-4a0a-bdcd-8c32ef75b75f" +} \ No newline at end of file