Skip to content

Commit

Permalink
[CPCN-27] CP Transcripts ingest parser (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarkLark86 authored Oct 31, 2023
1 parent 5dab4bd commit 575ebb6
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 0 deletions.
2 changes: 2 additions & 0 deletions server/cp/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from .parser.businesswire import BusinessWireParser
from .parser.globenewswire import GlobeNewswireParser
from .parser.cp_onclusive import CPOnclusiveFeedParser
from .parser.cp_transcripts import CPTranscriptsFeedParser


def init_app(app):
# register new parsers
register_feed_parser(BusinessWireParser.NAME, BusinessWireParser())
register_feed_parser(GlobeNewswireParser.NAME, GlobeNewswireParser())
register_feed_parser(CPTranscriptsFeedParser.NAME, CPTranscriptsFeedParser())

# override core parsers
registered_feed_parsers[CP_APMediaFeedParser.NAME] = CP_APMediaFeedParser()
Expand Down
40 changes: 40 additions & 0 deletions server/cp/ingest/parser/cp_transcripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Dict, Any, Optional

from superdesk import get_resource_service
from superdesk.io.feed_parsers.ninjs import NINJSFeedParser
from superdesk.text_utils import plain_text_to_html


def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]:
while version_number > 0:
ingest_id = f"{original_ingest_id}.{version_number}"
prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id)

if prev_item is not None:
return prev_item
version_number -= 1

return None


class CPTranscriptsFeedParser(NINJSFeedParser):
NAME = "cp_transcripts"
label = "CP Transcripts"

def _transform_from_ninjs(self, ninjs: Dict[str, Any]):
original_guid = ninjs["guid"]
version = int(ninjs["version"])
ninjs["guid"] = f"{original_guid}.{version}"
item = super()._transform_from_ninjs(ninjs)
item["version"] = version
item["body_html"] = plain_text_to_html(item["body_html"])
item.setdefault("extra", {}).update(dict(
publish_ingest_id_as_guid=True,
cp_version=version,
type="transcript",
))

previous_item = get_previous_version(original_guid, version - 1)
if previous_item is not None:
item["rewrite_of"] = previous_item["ingest_id"]
return item
36 changes: 36 additions & 0 deletions server/tests/ingest/parser/cp_transcripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import unittest
from unittest.mock import patch

import flask
import superdesk

from cp.ingest import CPTranscriptsFeedParser

from tests.ingest.parser import get_fixture_path
from tests.mock import resources


provider = {}
parser = CPTranscriptsFeedParser()


class CP_Transcripts_ParseTestCase(unittest.TestCase):
app = flask.Flask(__name__)

def test_parse(self):
with self.app.app_context(), patch.dict(superdesk.resources, resources):
superdesk.resources["archive"].service.find_one.side_effect = [
{"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", "version": 2, "extra": {"ap_version": 999}},
]
items = parser.parse(get_fixture_path("cp_transcripts.json", "cp_transcripts"), provider)
superdesk.resources["archive"].service.find_one.side_effect = None

item = items[0]
self.assertEqual("text", item["type"])
self.assertEqual("transcript", item["extra"]["type"])
self.assertEqual(True, item["extra"]["publish_ingest_id_as_guid"])
self.assertEqual(2, item["extra"]["cp_version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.2", item["guid"])
self.assertEqual(2, item["version"])
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["rewrite_of"])
self.assertTrue(item["body_html"].startswith("<p>laying around"))
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
4708
],
"createdDate": "2021-05-04T21:19:10.2",
"createdDateUtc": "2021-05-04T19:19:10.2",
"lastEditDate": "2022-05-10T13:14:34.873",
"lastEditDateUtc": "2022-05-10T11:14:34.873",
"deleted": false,
"deletionDate": null,
"website": "https://www.canadianinstitute.com/anti-money-laundering-financial-crime/",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"guid": "d3c8487a-1757-4dde-8bb5-22ca166c1e67",
"version": "2",
"type": "text",
"located": "Toronto, ON",
"language": "en-CA",
"headline": "History TV - Rust Valley Restorers, 7/6/2023 2:00:00 PM UTC - Segment #2",
"urgency": 3,
"pubstatus": "usable",
"body_html": "laying around \nmy mom's property. \nSome sheep \nmight wander in here \nand blow itself up. \nLet's see, is this \ngonna work, Ave? \nIt's a pretty \nheavy barrel. \nPerfect! \n[Narrator]: \nUsing an old rusty barrel \nand an empty ice cream pail... \nBring it over here, Shaf, \nwe're gonna stir it up. \n[Narrator]: Mike and Shafin \nprepare the tannerite \nfor target practice. \nOkay, now, \nwe should probably \njust add one of these \nat a time, eh? \n[Mike]: It's not really \nclassified as an explosive, \nbut when you hit it \nwith a bullet, \nit does explode, \nor it makes enough gas \nthat it's almost an explosive, \nbut whatever, \nit can hurt you bad. \nWatching at home, \nchildren, \ndo not do this. \n[Mike]: It's dangerous stuff \nin the hands \nof the wrong person, \nlike Shafin. \nShe's a-gonna \ngo boom. \n[cackling] \nOkay. \nGood? \nGuys, stay behind. \n[shoots] \n[laughing wildly] \nYou missed! \nYour turn. \nOkay, \nlet's do it! \n[shoots] \nOkay, Ave, \nyour turn! \nIf you aim for the white, \nyou'll hit it. \n[shoots] \n[bleep] \n[Mike]: Sure you don't wanna \ntake a crack, Helen? \nOh, I could probably try. \n[Avery]: Give a poke \nat 'er there, Grandma. \nPut the bullet in that. \nOkay. \n[shoots] \nWhoo-hoo-hoo! \nHoly [bleep]! \n[Shafin laughing] Oh! \nDid you see that piece \ngo flyin'? \n[laughing in delight] \n[Avery]: I'm fairly impressed. \nMy mother, \nafter all these years, \nshe's still \nan incredibly good shot. \nShe's one tough lady. \nWhoo-hoo! Whoo! \n[Mike]: \nLet's go check it out. \nI think that worked quite well. \nThat stuff is potent. \nHoly guacamole! \nMan! \nHere's my mom. \nDead shot, she is, eh? \nYeah. \nShe blew 'er apart \npretty damn good, man. \n[laughing] \nLook, Ave. Watch. \n[Narrator]: Mike and Avery \nhave one more thing to do \nbefore they hit the road... \n[Mike]: I've noticed \nthe structural defects \nof this building. \nVoil&#224;! \n[Narrator]: ...removing \nthe fragile Model T \nfrom the shed \nwithout damaging it. \nBack up the trailer, \ncome along, and on. \nYou just wrecked \nmy family heritage. \n-Well, we'll fix it. \n-[cackling] \nOkay, line me up \nthere, big feller. \nHow much further? \nGood enough. \nWe got one Mike-power \non the winch here. \nShouldn't be a problem. \nWell, there ya go. \nI think we gotta bounce it \nover an inch, Ave. \n[Mike]: It's very important \nto be careful \ngetting this thing out. \nThe thing's basically \nnine-tenths \nof a hundred years old. \nJust keep going! \nWe're aren't gonna \nhurt that fender? \nNo! \n[Mike]: We don't want \nto scratch it, \nwe don't wanna dent it. \nWe just wanna get it back \nto where we can do \nsomething with it. \nThat's looking \npretty close. \nIt is. \nIt's jammed on the fender, \nand it's jammed on... \nKeep going. \nI don't wanna \nwreck it, Ave. \nAve, we're bending \nthe window! \nNo, we're not. \nKeep going! \nIsn't the fender rubbing? \n-Keep pulling! \n-Okay! \n-[crunch] \n-What was that? \nOh, the steering \njust broke on 'er. \nWhat? Really? \nWhat did you \njust-- Avery. \nI don't know, \nsomething just happened \nin the steering box. \nYou're not capable \nof being gentle. \n[Mike]: \nAvery's always in a hurry. \nI don't know what for, \nbut why rush to cause more work? \nHere. \nHang on. \nOh, it's stuck-- \nHang on. \nThere! \nWe didn't do \ntoo much damage \nby the looks of it, Ave. \nWell, the front fender here \ngot a little warble in it. \n[Mike]: I mean, Helen's \nowned that Model T \n25 years. \nI know it's her pride and joy. \nIt'd be kinda cool \nto get it up and running for... \nfor the old girl, \nand kinda surprise her. \nI mean, in the light of day, \nit doesn't look \nthat bad, Ave. \nWhat more can we \nexpect from something \nthat's 90 years old? \nThere's two gerbils \nliving in the radiator. \n[chuckles] \n[Mike]: Well, it's been \na pleasure, Helen. \nThank you \nfor everything, \nand it was so nice \nto visit you. \nAnd to meet the kids. \nAnd to meet \nthe kids. \nIt was good to have \nMike and Avery around, \nand got a couple things \ngoing, and... \nthe lambs, \nand what have you, \nand have a few good \nold-fashioned arguments again! \n[laughs] \nHere, why don't \nI hold him \nwhile you have \na tearful goodbye \nwith your \nfavourite child? \n[laughing] \nMmm... \nIt was good \nseein' ya. \nHave a good trip home. \nYeah, you think about, \none of these days, \ngetting your affairs \ntogether, maybe. \nYou know, my mother's \nworked hard her whole life. \nShe needs to enjoy \nthe quality of life \nthat she deserves, \nsell some of her stuff, \nand, uh, lead a normal life... \nWell, I can't go \ntill these guys \nare big enough to ship. \n[Avery]: ...but I guess, to her, \nthis is a normal life. \nYou know, it's kind of \nlike Mike. \nShe comes out here, and goes, \n\"Oh, look at all my treasures!\" \nSo, what do you do? \nShe's happy. \nYou don't want \nto take that home \nfor a shop animal? \nNo. Okay, well, we got \na thousand K to go there, Ave. \nLet's make a mile. \nThanks again, Helen. \n-See ya! \nOh, thank you. \n-Okay. \nYou be good. \nOkay. \nBunch of prickly pears! \n[laughing] \nSee ya, Ave! \n[honking] \nBye! \nSee ya! \nGoodbye to the boys. \nDon't screw up the car! \n[Helen laughing] \nAt Nat Geo, the shark obsession \nruns deep. \n[Cheering] \nSo we had to make shark fest \nbigger than ever. \nWe're gonna dive in and take a \nlook. \n-Woah! \n-This is a shark fest! \n",
"slugline": "Rust-Valley-Restorers",
"firstcreated": "2023-07-06T14:00:01+0000",
"firstpublished": "2023-07-06T14:01:01+0000",
"source": "TV Eyes",
"extra": {
"headline_extended": "A visit to Avery's family farm leads to the discovery of unexpected treasures and puts Mike's veterinary skills to the test. Meanwhile, a new member of the Rust Valley rat rod community needs help rebuilding his dream."
},
"profile": "Story",
"priority": 5,
"subject": [
{
"code": "lifestyle",
"name": "Lifestyle",
"scheme": "tveyes.com"
},
{
"code": "20000565",
"name": "lifestyle",
"scheme": "subject_custom"
}
],
"service": [
{
"code": "g",
"name": "National"
}
],
"charcount": 874,
"wordcount": 151,
"readtime": 1,
"products": [
{
"code": "5f3d90fd77eb2ec9ce5c2a2d",
"name": "Stories"
}
],
"uri": "https://cms.cp.org/contentapi/items/3764e559-835f-4a0a-bdcd-8c32ef75b75f"
}

0 comments on commit 575ebb6

Please sign in to comment.