Adds support for waitrose.com (#869)

hhursev · Sep 23, 2023 · 6b40349 · 6b40349
1 parent 24f0ea6
commit 6b40349
Show file tree

Hide file tree

Showing 5 changed files with 1,400 additions and 0 deletions.
diff --git a/README.rst b/README.rst
@@ -349,6 +349,7 @@ Scrapers available for:
 - `https://www.vegetarbloggen.no/ <https://www.vegetarbloggen.no/>`_
 - `https://vegolosi.it/ <https://vegolosi.it>`_
 - `https://vegrecipesofindia.com/ <https://www.vegrecipesofindia.com/>`_
+- `https://www.waitrose.com/ <https://www.waitrose.com/>`_
 - `https://watchwhatueat.com/ <https://watchwhatueat.com/>`_
 - `https://wearenotmartha.com/ <https://wearenotmartha.com/>`_
 - `https://www.weightwatchers.com/ <https://www.weightwatchers.com/>`_ (*)

diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py
@@ -266,6 +266,7 @@
 from .vegetarbloggen import Vegetarbloggen
 from .vegolosi import Vegolosi
 from .vegrecipesofindia import VegRecipesOfIndia
+from .waitrose import Waitrose
 from .watchwhatueat import WatchWhatUEat
 from .wearenotmartha import WeAreNotMartha
 from .weightwatchers import WeightWatchers
@@ -569,6 +570,7 @@
     VegRecipesOfIndia.host(): VegRecipesOfIndia,
     Vegetarbloggen.host(): Vegetarbloggen,
     Vegolosi.host(): Vegolosi,
+    Waitrose.host(): Waitrose,
     WatchWhatUEat.host(): WatchWhatUEat,
     WeAreNotMartha.host(): WeAreNotMartha,
     WeightWatchers.host(): WeightWatchers,

diff --git a/recipe_scrapers/waitrose.py b/recipe_scrapers/waitrose.py
@@ -0,0 +1,58 @@
+# mypy: allow-untyped-defs
+
+from ._abstract import AbstractScraper
+from ._utils import normalize_string
+
+
+class Waitrose(AbstractScraper):
+    @classmethod
+    def host(cls):
+        return "waitrose.com"
+
+    def author(self):
+        return "waitrose.com"
+
+    def title(self):
+        return self.schema.title()
+
+    def total_time(self):
+        return self.schema.total_time()
+
+    def yields(self):
+        return self.schema.yields()
+
+    def image(self):
+        img_tag = self.soup.find("img", {"itemprop": "image"})
+        if img_tag:
+            url = img_tag.get("src")
+            return url[2:] if url.startswith("//") else url
+
+    def ingredients(self):
+        ingredients_div = self.soup.find("div", {"class": "ingredients"})
+
+        if ingredients_div:
+            ingredient_items = ingredients_div.find_all("li")
+            ingredient_text = [
+                normalize_string(item.get_text())
+                for item in ingredient_items
+                if item.get_text()
+            ]
+            return ingredient_text
+
+    def extract_instructions(self):
+        instructions_div = self.soup.find("div", {"class": "ingredients"})
+
+        if instructions_div:
+            instruction_items = instructions_div.find_all("li")
+            instruction_text = [
+                normalize_string(item.get_text())
+                for item in instruction_items
+                if item.get_text()
+            ]
+            return "\n".join(instruction_text)
+
+    def ratings(self):
+        return self.schema.ratings()
+
+    def description(self):
+        return self.schema.description()