Skip to content

Commit

Permalink
Update the recipe to skip ingredient that was already in the soup
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Sep 19, 2024
1 parent 54a841b commit 87f674f
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 25 deletions.
14 changes: 3 additions & 11 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@
from beets import plugins, ui
from beets.autotag.hooks import string_dist

DIV_RE = re.compile(r"<(/?)div>?", re.I)
COMMENT_RE = re.compile(r"<!--.*-->", re.S)
TAG_RE = re.compile(r"<[^>]*>")
BREAK_RE = re.compile(r"\n?\s*<br([\s|/][^>]*)*>\s*\n?", re.I)
USER_AGENT = f"beets/{beets.__version__}"

Expand Down Expand Up @@ -592,7 +589,7 @@ def remove_credits(text):
return text


def _scrape_strip_cruft(html, plain_text_out=False):
def _scrape_strip_cruft(html: str) -> str:
"""Clean up HTML"""
html = unescape(html)

Expand All @@ -604,13 +601,8 @@ def _scrape_strip_cruft(html, plain_text_out=False):
html = re.sub("<aside .+?</aside>", "", html) # remove Google Ads tags
html = re.sub(r"</?(em|strong)[^>]*>", "", html) # remove bold / italics

if plain_text_out: # Strip remaining HTML tags
html = COMMENT_RE.sub("", html)
html = TAG_RE.sub("", html)

html = "\n".join([x.strip() for x in html.strip().split("\n")])
html = re.sub(r"\n{3,}", r"\n\n", html)
return html
return re.sub(r"\n{3,}", r"\n\n", html)


def _scrape_merge_paragraphs(html):
Expand Down Expand Up @@ -1045,7 +1037,7 @@ def get_lyrics(self, artist, title, album=None, length=None):
if lyrics := backend.fetch(
artist, title, album=album, length=length
):
return _scrape_strip_cruft(lyrics, True)
return lyrics

def append_translation(self, text, to_lang):
from xml.etree import ElementTree
Expand Down
15 changes: 1 addition & 14 deletions test/plugins/test_lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,22 +151,9 @@ def test_remove_credits(self):
if the beat aint crackin"""
assert lyrics.remove_credits(text) == text

def test_scrape_strip_cruft(self):
text = """<!--lyrics below-->
&nbsp;one
<br class='myclass'>
two !
<br><br \\>
<blink>four</blink>"""
assert lyrics._scrape_strip_cruft(text, True) == "one\ntwo !\n\nfour"

def test_scrape_strip_scripts(self):
text = """foo<script>bar</script>baz"""
assert lyrics._scrape_strip_cruft(text, True) == "foobaz"

def test_scrape_strip_tag_in_comment(self):
text = """foo<!--<bar>-->qux"""
assert lyrics._scrape_strip_cruft(text, True) == "fooqux"
assert lyrics._scrape_strip_cruft(text) == "foobaz"

def test_scrape_merge_paragraphs(self):
text = "one</p> <p class='myclass'>two</p><p>three"
Expand Down

0 comments on commit 87f674f

Please sign in to comment.