diff --git a/parsel/selector.py b/parsel/selector.py index b84b0308..1fa041f6 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -200,30 +200,36 @@ def re_first( return el return default - def getall(self) -> List[str]: + def getall(self, *, strip: bool = False) -> List[str]: """ Call the ``.get()`` method for each element is this list and return their results flattened, as a list of strings. """ - return [x.get() for x in self] + data = [x.get() for x in self] + if strip: + return [x.strip() if x else x for x in data] + return data extract = getall @typing.overload - def get(self, default: None = None) -> Optional[str]: + def get(self, default: None = None, strip: bool = ...) -> Optional[str]: pass @typing.overload - def get(self, default: str) -> str: + def get(self, default: str, strip: bool = ...) -> str: pass - def get(self, default: Optional[str] = None) -> Optional[str]: + def get( + self, default: Optional[str] = None, strip: bool = False + ) -> Optional[str]: """ Return the result of ``.get()`` for the first element in this list. If the list is empty, return the default value. """ for x in self: - return x.get() + value = x.get() + return value.strip() if strip and value else value return default extract_first = get diff --git a/tests/test_selector.py b/tests/test_selector.py index d0bb2816..6d57cbac 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -323,6 +323,53 @@ def test_selectorlist_get_alias(self) -> None: self.assertEqual(sel.xpath("//ul/li").get(), '
  • 1
  • ') self.assertEqual(sel.xpath("//ul/li/text()").get(), "1") + def test_selector_get_strip(self) -> None: + body = '' + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(), '
  • 2
  • ' + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(strip=True), + '
  • 2
  • ', + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(), " 2 " + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(strip=True), "2" + ) + + def test_selector_getall_strip(self) -> None: + body = ( + '' + ) + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li").getall(), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li").getall(strip=True), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(), ["1", " 2 ", " 3"] + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(strip=True), ["1", "2", "3"] + ) + def test_re_first(self) -> None: """Test if re_first() returns first matched element""" body = ''