Skip to content

Commit

Permalink
Adds support for waitrose.com (#869)
Browse files Browse the repository at this point in the history
  • Loading branch information
jknndy authored Sep 23, 2023
1 parent 24f0ea6 commit 6b40349
Show file tree
Hide file tree
Showing 5 changed files with 1,400 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ Scrapers available for:
- `https://www.vegetarbloggen.no/ <https://www.vegetarbloggen.no/>`_
- `https://vegolosi.it/ <https://vegolosi.it>`_
- `https://vegrecipesofindia.com/ <https://www.vegrecipesofindia.com/>`_
- `https://www.waitrose.com/ <https://www.waitrose.com/>`_
- `https://watchwhatueat.com/ <https://watchwhatueat.com/>`_
- `https://wearenotmartha.com/ <https://wearenotmartha.com/>`_
- `https://www.weightwatchers.com/ <https://www.weightwatchers.com/>`_ (*)
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@
from .vegetarbloggen import Vegetarbloggen
from .vegolosi import Vegolosi
from .vegrecipesofindia import VegRecipesOfIndia
from .waitrose import Waitrose
from .watchwhatueat import WatchWhatUEat
from .wearenotmartha import WeAreNotMartha
from .weightwatchers import WeightWatchers
Expand Down Expand Up @@ -569,6 +570,7 @@
VegRecipesOfIndia.host(): VegRecipesOfIndia,
Vegetarbloggen.host(): Vegetarbloggen,
Vegolosi.host(): Vegolosi,
Waitrose.host(): Waitrose,
WatchWhatUEat.host(): WatchWhatUEat,
WeAreNotMartha.host(): WeAreNotMartha,
WeightWatchers.host(): WeightWatchers,
Expand Down
58 changes: 58 additions & 0 deletions recipe_scrapers/waitrose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._utils import normalize_string


class Waitrose(AbstractScraper):
@classmethod
def host(cls):
return "waitrose.com"

def author(self):
return "waitrose.com"

def title(self):
return self.schema.title()

def total_time(self):
return self.schema.total_time()

def yields(self):
return self.schema.yields()

def image(self):
img_tag = self.soup.find("img", {"itemprop": "image"})
if img_tag:
url = img_tag.get("src")
return url[2:] if url.startswith("//") else url

def ingredients(self):
ingredients_div = self.soup.find("div", {"class": "ingredients"})

if ingredients_div:
ingredient_items = ingredients_div.find_all("li")
ingredient_text = [
normalize_string(item.get_text())
for item in ingredient_items
if item.get_text()
]
return ingredient_text

def extract_instructions(self):
instructions_div = self.soup.find("div", {"class": "ingredients"})

if instructions_div:
instruction_items = instructions_div.find_all("li")
instruction_text = [
normalize_string(item.get_text())
for item in instruction_items
if item.get_text()
]
return "\n".join(instruction_text)

def ratings(self):
return self.schema.ratings()

def description(self):
return self.schema.description()
Loading

0 comments on commit 6b40349

Please sign in to comment.