From dd70e0658c946e79dc0c269c5cae10ab78a39e57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?=
two
three" assert lyrics._scrape_merge_paragraphs(text) == "one\ntwo\nthree" - def test_missing_lyrics(self): - lyrics = """ -Lyricsmania staff is working hard for you to add $TITLE lyrics as soon -as they'll be released by $ARTIST, check back soon! -In case you have the lyrics to $TITLE and want to send them to us, fill out -the following form. -""" - assert not google.is_lyrics(lyrics) - def url_to_filename(url): url = re.sub(r"https?://|www.", "", url) @@ -232,17 +206,55 @@ def __call__(self, url, filename=None): LYRICS_ROOT_DIR = os.path.join(_common.RSRC, b"lyrics") -class LyricsGoogleBaseTest(unittest.TestCase): - def setUp(self): - """Set up configuration.""" - try: - __import__("bs4") - except ImportError: - self.skipTest("Beautiful Soup 4 not available") +class LyricsBackendTest(PluginMixin): + plugin = "lyrics" + + @pytest.fixture + def plugin_config(self): + """Return lyrics configuration to test.""" + return {} + + @pytest.fixture + def backend(self, backend_name, plugin_config): + """Set configuration and returns the backend instance.""" + plugin_config["sources"] = [backend_name] + self.config[self.plugin].set(plugin_config) + + lyrics_plugin = lyrics.LyricsPlugin() + return lyrics_plugin.backends[0] + @pytest.mark.integration_test + def test_backend_source(self, backend): + """Test default backends with a song known to exist in respective + databases. + """ + title = "Lady Madonna" + + res = backend.fetch("The Beatles", title) -@pytest.mark.integration_test -class TestSources: + assert res + assert PHRASE_BY_TITLE[title] in res.lower() + + +class TestGoogleLyrics(LyricsBackendTest): + """Test scraping heuristics on a fake html page.""" + + source = dict( + url="http://www.example.com", + artist="John Doe", + title="Beets song", + path="/lyrics/beetssong", + ) + + @pytest.fixture(scope="class") + def backend_name(self): + return "google" + + @pytest.fixture(scope="class") + def plugin_config(self): + return {"google_API_key": "test"} + + @pytest.mark.integration_test @pytest.mark.parametrize( "title, url", [ @@ -272,77 +284,40 @@ class TestSources: ), ], ) - def test_google_source(self, title, url): + def test_backend_source(self, backend, title, url): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped. """ - response = raw_backend.fetch_url(url) + response = backend.fetch_url(url) result = lyrics.scrape_lyrics_from_html(response).lower() - assert google.is_lyrics(result) + assert backend.is_lyrics(result) assert PHRASE_BY_TITLE[title] in result - @pytest.mark.parametrize( - "backend", - [ - pytest.param(lyrics.Genius, marks=skip_ci), - lyrics.Tekstowo, - lyrics.LRCLib, - # lyrics.MusiXmatch, - ], - ) - def test_backend_source(self, backend): - """Test default backends with a song known to exist in respective - databases. - """ - plugin = lyrics.LyricsPlugin() - backend = backend(plugin.config, plugin._log) - title = "Lady Madonna" - res = backend.fetch("The Beatles", title) - assert PHRASE_BY_TITLE[title] in res.lower() - - -class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): - """Test scraping heuristics on a fake html page.""" - - source = dict( - url="http://www.example.com", - artist="John Doe", - title="Beets song", - path="/lyrics/beetssong", - ) - - def setUp(self): - """Set up configuration""" - LyricsGoogleBaseTest.setUp(self) - self.plugin = lyrics.LyricsPlugin() - @patch.object(lyrics.Backend, "fetch_url", MockFetchUrl()) - def test_mocked_source_ok(self): + def test_mocked_source_ok(self, backend): """Test that lyrics of the mocked page are correctly scraped""" url = self.source["url"] + self.source["path"] - res = lyrics.scrape_lyrics_from_html(raw_backend.fetch_url(url)) - assert google.is_lyrics(res), url + res = lyrics.scrape_lyrics_from_html(backend.fetch_url(url)) + assert backend.is_lyrics(res), url assert PHRASE_BY_TITLE[self.source["title"]] in res.lower() @patch.object(lyrics.Backend, "fetch_url", MockFetchUrl()) - def test_is_page_candidate_exact_match(self): + def test_is_page_candidate_exact_match(self, backend): """Test matching html page title with song infos -- when song infos are present in the title. """ - from bs4 import BeautifulSoup, SoupStrainer - s = self.source url = str(s["url"] + s["path"]) - html = raw_backend.fetch_url(url) + html = backend.fetch_url(url) soup = BeautifulSoup( html, "html.parser", parse_only=SoupStrainer("title") ) - assert google.is_page_candidate( + assert backend.is_page_candidate( url, soup.title.string, s["title"], s["artist"] ), url - def test_is_page_candidate_fuzzy_match(self): + def test_is_page_candidate_fuzzy_match(self, backend): """Test matching html page title with song infos -- when song infos are not present in the title. """ @@ -351,16 +326,16 @@ def test_is_page_candidate_fuzzy_match(self): url_title = "example.com | Beats song by John doe" # very small diffs (typo) are ok eg 'beats' vs 'beets' with same artist - assert google.is_page_candidate( + assert backend.is_page_candidate( url, url_title, s["title"], s["artist"] ), url # reject different title url_title = "example.com | seets bong lyrics by John doe" - assert not google.is_page_candidate( + assert not backend.is_page_candidate( url, url_title, s["title"], s["artist"] ), url - def test_is_page_candidate_special_chars(self): + def test_is_page_candidate_special_chars(self, backend): """Ensure that `is_page_candidate` doesn't crash when the artist and such contain special regular expression characters. """ @@ -369,30 +344,45 @@ def test_is_page_candidate_special_chars(self): url = s["url"] + s["path"] url_title = "foo" - google.is_page_candidate(url, url_title, s["title"], "Sunn O)))") - + backend.is_page_candidate(url, url_title, s["title"], "Sunn O)))") -# test Genius backend + def test_is_lyrics(self, backend): + texts = ["LyricsMania.com - Copyright (c) 2013 - All Rights Reserved"] + texts += [ + """All material found on this site is property\n + of mywickedsongtext brand""" + ] + for t in texts: + assert not backend.is_lyrics(t) + def test_slugify(self, backend): + text = "http://site.com/\xe7afe-au_lait(boisson)" + assert backend.slugify(text) == "http://site.com/cafe_au_lait" -class GeniusBaseTest(unittest.TestCase): - def setUp(self): - """Set up configuration.""" - try: - __import__("bs4") - except ImportError: - self.skipTest("Beautiful Soup 4 not available") + def test_missing_lyrics(self, backend): + lyrics = """ +Lyricsmania staff is working hard for you to add $TITLE lyrics as soon +as they'll be released by $ARTIST, check back soon! +In case you have the lyrics to $TITLE and want to send them to us, fill out +the following form. +""" + assert not backend.is_lyrics(lyrics) -class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest): - """tests Genius._scrape_lyrics_from_html()""" +class TestGeniusLyrics(LyricsBackendTest): + @pytest.fixture(scope="class") + def backend_name(self): + return "genius" - def setUp(self): - """Set up configuration""" - GeniusBaseTest.setUp(self) - self.plugin = lyrics.LyricsPlugin() + @pytest.mark.xfail( + bool(os.environ.get("GITHUB_ACTIONS")), + reason="Genius returns 403 FORBIDDEN in CI", + raises=AssertionError, + ) + def test_backend_source(self, backend): + super().test_backend_source(backend) - def test_no_lyrics_div(self): + def test_no_lyrics_div(self, backend): """Ensure we don't crash when the scraping the html for a genius page doesn't contain
""" @@ -400,38 +390,27 @@ def test_no_lyrics_div(self): # expected return value None url = "https://genius.com/sample" mock = MockFetchUrl() - assert genius._scrape_lyrics_from_html(mock(url)) is None + assert backend._scrape_lyrics_from_html(mock(url)) is None - def test_good_lyrics(self): + def test_good_lyrics(self, backend): """Ensure we are able to scrape a page with lyrics""" url = "https://genius.com/Ttng-chinchilla-lyrics" mock = MockFetchUrl() - lyrics = genius._scrape_lyrics_from_html(mock(url)) + lyrics = backend._scrape_lyrics_from_html(mock(url)) assert lyrics is not None assert lyrics.count("\n") == 28 - def test_good_lyrics_multiple_divs(self): + def test_good_lyrics_multiple_divs(self, backend): """Ensure we are able to scrape a page with lyrics""" url = "https://genius.com/2pac-all-eyez-on-me-lyrics" mock = MockFetchUrl() - lyrics = genius._scrape_lyrics_from_html(mock(url)) + lyrics = backend._scrape_lyrics_from_html(mock(url)) assert lyrics is not None assert lyrics.count("\n") == 133 - # TODO: find an example of a lyrics page with multiple divs and test it - - -class GeniusFetchTest(GeniusBaseTest): - """tests Genius.fetch()""" - - def setUp(self): - """Set up configuration""" - GeniusBaseTest.setUp(self) - self.plugin = lyrics.LyricsPlugin() - @patch.object(lyrics.Genius, "_scrape_lyrics_from_html") @patch.object(lyrics.Backend, "fetch_url", return_value=True) - def test_json(self, mock_fetch_url, mock_scrape): + def test_json(self, mock_fetch_url, mock_scrape, backend): """Ensure we're finding artist matches""" with patch.object( lyrics.Genius, @@ -459,51 +438,35 @@ def test_json(self, mock_fetch_url, mock_scrape): ) as mock_json: # genius uses zero-width-spaces (\u200B) for lowercase # artists so we make sure we can match those - assert genius.fetch("blackbear", "Idfc") is not None + assert backend.fetch("blackbear", "Idfc") is not None mock_fetch_url.assert_called_once_with("blackbear_url") mock_scrape.assert_called_once_with(True) # genius uses the hyphen minus (\u002D) as their dash - assert genius.fetch("El-p", "Idfc") is not None + assert backend.fetch("El-p", "Idfc") is not None mock_fetch_url.assert_called_with("El-p_url") mock_scrape.assert_called_with(True) # test no matching artist - assert genius.fetch("doesntexist", "none") is None + assert backend.fetch("doesntexist", "none") is None # test invalid json mock_json.return_value = None - assert genius.fetch("blackbear", "Idfc") is None + assert backend.fetch("blackbear", "Idfc") is None -# test Tekstowo +class TestTekstowoLyrics(LyricsBackendTest): + @pytest.fixture(scope="class") + def backend_name(self): + return "tekstowo" - -class TekstowoBaseTest(unittest.TestCase): - def setUp(self): - """Set up configuration.""" - try: - __import__("bs4") - except ImportError: - self.skipTest("Beautiful Soup 4 not available") - - -class TekstowoExtractLyricsTest(TekstowoBaseTest): - """tests Tekstowo.extract_lyrics()""" - - def setUp(self): - """Set up configuration""" - TekstowoBaseTest.setUp(self) - self.plugin = lyrics.LyricsPlugin() - tekstowo.config = self.plugin.config - - def test_good_lyrics(self): + def test_good_lyrics(self, backend): """Ensure we are able to scrape a page with lyrics""" url = "https://www.tekstowo.pl/piosenka,24kgoldn,city_of_angels_1.html" mock = MockFetchUrl() - assert tekstowo.extract_lyrics(mock(url)) + assert backend.extract_lyrics(mock(url)) - def test_no_lyrics(self): + def test_no_lyrics(self, backend): """Ensure we don't crash when the scraping the html for a Tekstowo page doesn't contain lyrics """ @@ -512,19 +475,29 @@ def test_no_lyrics(self): "beethoven_piano_sonata_17_tempest_the_3rd_movement.html" ) mock = MockFetchUrl() - assert not tekstowo.extract_lyrics(mock(url)) - + assert not backend.extract_lyrics(mock(url)) -# test LRCLib backend +class TestLRCLibLyrics(LyricsBackendTest): + @pytest.fixture(scope="class") + def backend_name(self): + return "lrclib" -class LRCLibLyricsTest(unittest.TestCase): - def setUp(self): - self.plugin = lyrics.LyricsPlugin() - lrclib.config = self.plugin.config + @pytest.fixture + def mock_get(self): + with patch("beetsplug.lyrics.requests.get") as mock: + yield mock - @patch("beetsplug.lyrics.requests.get") - def test_fetch_synced_lyrics(self, mock_get): + @pytest.mark.parametrize( + "plugin_config, expected_lyrics_type", + [ + ({"synced": True}, "syncedLyrics"), + ({"synced": False}, "plainLyrics"), + ], + ) + def test_synced_config_option( + self, backend, mock_get, expected_lyrics_type + ): mock_response = { "syncedLyrics": "[00:00.00] la la la", "plainLyrics": "la la la", @@ -532,15 +505,10 @@ def test_fetch_synced_lyrics(self, mock_get): mock_get.return_value.json.return_value = mock_response mock_get.return_value.status_code = 200 - lyrics = lrclib.fetch("la", "la", "la", 999) - assert lyrics == mock_response["plainLyrics"] - - self.plugin.config["synced"] = True - lyrics = lrclib.fetch("la", "la", "la", 999) - assert lyrics == mock_response["syncedLyrics"] + lyrics = backend.fetch("la", "la", "la", 999) + assert lyrics == mock_response[expected_lyrics_type] - @patch("beetsplug.lyrics.requests.get") - def test_fetch_plain_lyrics(self, mock_get): + def test_fetch_plain_lyrics(self, backend, mock_get): mock_response = { "syncedLyrics": "", "plainLyrics": "la la la", @@ -548,12 +516,11 @@ def test_fetch_plain_lyrics(self, mock_get): mock_get.return_value.json.return_value = mock_response mock_get.return_value.status_code = 200 - lyrics = lrclib.fetch("la", "la", "la", 999) + lyrics = backend.fetch("la", "la", "la", 999) assert lyrics == mock_response["plainLyrics"] - @patch("beetsplug.lyrics.requests.get") - def test_fetch_not_found(self, mock_get): + def test_fetch_not_found(self, backend, mock_get): mock_response = { "statusCode": 404, "error": "Not Found", @@ -562,15 +529,14 @@ def test_fetch_not_found(self, mock_get): mock_get.return_value.json.return_value = mock_response mock_get.return_value.status_code = 404 - lyrics = lrclib.fetch("la", "la", "la", 999) + lyrics = backend.fetch("la", "la", "la", 999) assert lyrics is None - @patch("beetsplug.lyrics.requests.get") - def test_fetch_exception(self, mock_get): + def test_fetch_exception(self, backend, mock_get): mock_get.side_effect = requests.RequestException - lyrics = lrclib.fetch("la", "la", "la", 999) + lyrics = backend.fetch("la", "la", "la", 999) assert lyrics is None