-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CPCN-27] CP Transcripts ingest parser (#187)
- Loading branch information
1 parent
5dab4bd
commit 575ebb6
Showing
5 changed files
with
128 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from typing import Dict, Any, Optional | ||
|
||
from superdesk import get_resource_service | ||
from superdesk.io.feed_parsers.ninjs import NINJSFeedParser | ||
from superdesk.text_utils import plain_text_to_html | ||
|
||
|
||
def get_previous_version(original_ingest_id: str, version_number: int) -> Optional[Dict[str, Any]]: | ||
while version_number > 0: | ||
ingest_id = f"{original_ingest_id}.{version_number}" | ||
prev_item = get_resource_service("archive").find_one(req=None, ingest_id=ingest_id) | ||
|
||
if prev_item is not None: | ||
return prev_item | ||
version_number -= 1 | ||
|
||
return None | ||
|
||
|
||
class CPTranscriptsFeedParser(NINJSFeedParser): | ||
NAME = "cp_transcripts" | ||
label = "CP Transcripts" | ||
|
||
def _transform_from_ninjs(self, ninjs: Dict[str, Any]): | ||
original_guid = ninjs["guid"] | ||
version = int(ninjs["version"]) | ||
ninjs["guid"] = f"{original_guid}.{version}" | ||
item = super()._transform_from_ninjs(ninjs) | ||
item["version"] = version | ||
item["body_html"] = plain_text_to_html(item["body_html"]) | ||
item.setdefault("extra", {}).update(dict( | ||
publish_ingest_id_as_guid=True, | ||
cp_version=version, | ||
type="transcript", | ||
)) | ||
|
||
previous_item = get_previous_version(original_guid, version - 1) | ||
if previous_item is not None: | ||
item["rewrite_of"] = previous_item["ingest_id"] | ||
return item |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import unittest | ||
from unittest.mock import patch | ||
|
||
import flask | ||
import superdesk | ||
|
||
from cp.ingest import CPTranscriptsFeedParser | ||
|
||
from tests.ingest.parser import get_fixture_path | ||
from tests.mock import resources | ||
|
||
|
||
provider = {} | ||
parser = CPTranscriptsFeedParser() | ||
|
||
|
||
class CP_Transcripts_ParseTestCase(unittest.TestCase): | ||
app = flask.Flask(__name__) | ||
|
||
def test_parse(self): | ||
with self.app.app_context(), patch.dict(superdesk.resources, resources): | ||
superdesk.resources["archive"].service.find_one.side_effect = [ | ||
{"ingest_id": "d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", "version": 2, "extra": {"ap_version": 999}}, | ||
] | ||
items = parser.parse(get_fixture_path("cp_transcripts.json", "cp_transcripts"), provider) | ||
superdesk.resources["archive"].service.find_one.side_effect = None | ||
|
||
item = items[0] | ||
self.assertEqual("text", item["type"]) | ||
self.assertEqual("transcript", item["extra"]["type"]) | ||
self.assertEqual(True, item["extra"]["publish_ingest_id_as_guid"]) | ||
self.assertEqual(2, item["extra"]["cp_version"]) | ||
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.2", item["guid"]) | ||
self.assertEqual(2, item["version"]) | ||
self.assertEqual("d3c8487a-1757-4dde-8bb5-22ca166c1e67.1", item["rewrite_of"]) | ||
self.assertTrue(item["body_html"].startswith("<p>laying around")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
server/tests/ingest/parser/fixtures/cp_transcripts/cp_transcripts.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
{ | ||
"guid": "d3c8487a-1757-4dde-8bb5-22ca166c1e67", | ||
"version": "2", | ||
"type": "text", | ||
"located": "Toronto, ON", | ||
"language": "en-CA", | ||
"headline": "History TV - Rust Valley Restorers, 7/6/2023 2:00:00 PM UTC - Segment #2", | ||
"urgency": 3, | ||
"pubstatus": "usable", | ||
"body_html": "laying around \nmy mom's property. \nSome sheep \nmight wander in here \nand blow itself up. \nLet's see, is this \ngonna work, Ave? \nIt's a pretty \nheavy barrel. \nPerfect! \n[Narrator]: \nUsing an old rusty barrel \nand an empty ice cream pail... \nBring it over here, Shaf, \nwe're gonna stir it up. \n[Narrator]: Mike and Shafin \nprepare the tannerite \nfor target practice. \nOkay, now, \nwe should probably \njust add one of these \nat a time, eh? \n[Mike]: It's not really \nclassified as an explosive, \nbut when you hit it \nwith a bullet, \nit does explode, \nor it makes enough gas \nthat it's almost an explosive, \nbut whatever, \nit can hurt you bad. \nWatching at home, \nchildren, \ndo not do this. \n[Mike]: It's dangerous stuff \nin the hands \nof the wrong person, \nlike Shafin. \nShe's a-gonna \ngo boom. \n[cackling] \nOkay. \nGood? \nGuys, stay behind. \n[shoots] \n[laughing wildly] \nYou missed! \nYour turn. \nOkay, \nlet's do it! \n[shoots] \nOkay, Ave, \nyour turn! \nIf you aim for the white, \nyou'll hit it. \n[shoots] \n[bleep] \n[Mike]: Sure you don't wanna \ntake a crack, Helen? \nOh, I could probably try. \n[Avery]: Give a poke \nat 'er there, Grandma. \nPut the bullet in that. \nOkay. \n[shoots] \nWhoo-hoo-hoo! \nHoly [bleep]! \n[Shafin laughing] Oh! \nDid you see that piece \ngo flyin'? \n[laughing in delight] \n[Avery]: I'm fairly impressed. \nMy mother, \nafter all these years, \nshe's still \nan incredibly good shot. \nShe's one tough lady. \nWhoo-hoo! Whoo! \n[Mike]: \nLet's go check it out. \nI think that worked quite well. \nThat stuff is potent. \nHoly guacamole! \nMan! \nHere's my mom. \nDead shot, she is, eh? \nYeah. \nShe blew 'er apart \npretty damn good, man. \n[laughing] \nLook, Ave. Watch. \n[Narrator]: Mike and Avery \nhave one more thing to do \nbefore they hit the road... \n[Mike]: I've noticed \nthe structural defects \nof this building. \nVoilà! \n[Narrator]: ...removing \nthe fragile Model T \nfrom the shed \nwithout damaging it. \nBack up the trailer, \ncome along, and on. \nYou just wrecked \nmy family heritage. \n-Well, we'll fix it. \n-[cackling] \nOkay, line me up \nthere, big feller. \nHow much further? \nGood enough. \nWe got one Mike-power \non the winch here. \nShouldn't be a problem. \nWell, there ya go. \nI think we gotta bounce it \nover an inch, Ave. \n[Mike]: It's very important \nto be careful \ngetting this thing out. \nThe thing's basically \nnine-tenths \nof a hundred years old. \nJust keep going! \nWe're aren't gonna \nhurt that fender? \nNo! \n[Mike]: We don't want \nto scratch it, \nwe don't wanna dent it. \nWe just wanna get it back \nto where we can do \nsomething with it. \nThat's looking \npretty close. \nIt is. \nIt's jammed on the fender, \nand it's jammed on... \nKeep going. \nI don't wanna \nwreck it, Ave. \nAve, we're bending \nthe window! \nNo, we're not. \nKeep going! \nIsn't the fender rubbing? \n-Keep pulling! \n-Okay! \n-[crunch] \n-What was that? \nOh, the steering \njust broke on 'er. \nWhat? Really? \nWhat did you \njust-- Avery. \nI don't know, \nsomething just happened \nin the steering box. \nYou're not capable \nof being gentle. \n[Mike]: \nAvery's always in a hurry. \nI don't know what for, \nbut why rush to cause more work? \nHere. \nHang on. \nOh, it's stuck-- \nHang on. \nThere! \nWe didn't do \ntoo much damage \nby the looks of it, Ave. \nWell, the front fender here \ngot a little warble in it. \n[Mike]: I mean, Helen's \nowned that Model T \n25 years. \nI know it's her pride and joy. \nIt'd be kinda cool \nto get it up and running for... \nfor the old girl, \nand kinda surprise her. \nI mean, in the light of day, \nit doesn't look \nthat bad, Ave. \nWhat more can we \nexpect from something \nthat's 90 years old? \nThere's two gerbils \nliving in the radiator. \n[chuckles] \n[Mike]: Well, it's been \na pleasure, Helen. \nThank you \nfor everything, \nand it was so nice \nto visit you. \nAnd to meet the kids. \nAnd to meet \nthe kids. \nIt was good to have \nMike and Avery around, \nand got a couple things \ngoing, and... \nthe lambs, \nand what have you, \nand have a few good \nold-fashioned arguments again! \n[laughs] \nHere, why don't \nI hold him \nwhile you have \na tearful goodbye \nwith your \nfavourite child? \n[laughing] \nMmm... \nIt was good \nseein' ya. \nHave a good trip home. \nYeah, you think about, \none of these days, \ngetting your affairs \ntogether, maybe. \nYou know, my mother's \nworked hard her whole life. \nShe needs to enjoy \nthe quality of life \nthat she deserves, \nsell some of her stuff, \nand, uh, lead a normal life... \nWell, I can't go \ntill these guys \nare big enough to ship. \n[Avery]: ...but I guess, to her, \nthis is a normal life. \nYou know, it's kind of \nlike Mike. \nShe comes out here, and goes, \n\"Oh, look at all my treasures!\" \nSo, what do you do? \nShe's happy. \nYou don't want \nto take that home \nfor a shop animal? \nNo. Okay, well, we got \na thousand K to go there, Ave. \nLet's make a mile. \nThanks again, Helen. \n-See ya! \nOh, thank you. \n-Okay. \nYou be good. \nOkay. \nBunch of prickly pears! \n[laughing] \nSee ya, Ave! \n[honking] \nBye! \nSee ya! \nGoodbye to the boys. \nDon't screw up the car! \n[Helen laughing] \nAt Nat Geo, the shark obsession \nruns deep. \n[Cheering] \nSo we had to make shark fest \nbigger than ever. \nWe're gonna dive in and take a \nlook. \n-Woah! \n-This is a shark fest! \n", | ||
"slugline": "Rust-Valley-Restorers", | ||
"firstcreated": "2023-07-06T14:00:01+0000", | ||
"firstpublished": "2023-07-06T14:01:01+0000", | ||
"source": "TV Eyes", | ||
"extra": { | ||
"headline_extended": "A visit to Avery's family farm leads to the discovery of unexpected treasures and puts Mike's veterinary skills to the test. Meanwhile, a new member of the Rust Valley rat rod community needs help rebuilding his dream." | ||
}, | ||
"profile": "Story", | ||
"priority": 5, | ||
"subject": [ | ||
{ | ||
"code": "lifestyle", | ||
"name": "Lifestyle", | ||
"scheme": "tveyes.com" | ||
}, | ||
{ | ||
"code": "20000565", | ||
"name": "lifestyle", | ||
"scheme": "subject_custom" | ||
} | ||
], | ||
"service": [ | ||
{ | ||
"code": "g", | ||
"name": "National" | ||
} | ||
], | ||
"charcount": 874, | ||
"wordcount": 151, | ||
"readtime": 1, | ||
"products": [ | ||
{ | ||
"code": "5f3d90fd77eb2ec9ce5c2a2d", | ||
"name": "Stories" | ||
} | ||
], | ||
"uri": "https://cms.cp.org/contentapi/items/3764e559-835f-4a0a-bdcd-8c32ef75b75f" | ||
} |