diff --git a/parsel/selector.py b/parsel/selector.py index b84b0308..3df09333 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -200,12 +200,15 @@ def re_first( return el return default - def getall(self) -> List[str]: + def getall(self, strip: bool = False) -> List[str]: """ Call the ``.get()`` method for each element is this list and return their results flattened, as a list of strings. """ - return [x.get() for x in self] + data = [x.get() for x in self] + if strip: + return [x.strip() if x else x for x in data] + return data extract = getall @@ -217,13 +220,20 @@ def get(self, default: None = None) -> Optional[str]: def get(self, default: str) -> str: pass - def get(self, default: Optional[str] = None) -> Optional[str]: + @typing.overload + def get(self, strip: bool) -> str: + pass + + def get( + self, default: Optional[str] = None, strip: Optional[bool] = False + ) -> Optional[str]: """ Return the result of ``.get()`` for the first element in this list. If the list is empty, return the default value. """ for x in self: - return x.get() + value = x.get() + return value.strip() if strip and value else value return default extract_first = get diff --git a/tests/test_selector.py b/tests/test_selector.py index d0bb2816..2afb1882 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -90,7 +90,7 @@ def test_simple_selection(self) -> None: def test_simple_selection_with_variables(self) -> None: """Using XPath variables""" body = "

" - sel = self.sscls(text=body) + sel = self.sscls(text=body) # Selector doesnt have default field self.assertEqual( [ @@ -323,6 +323,53 @@ def test_selectorlist_get_alias(self) -> None: self.assertEqual(sel.xpath("//ul/li").get(), '
  • 1
  • ') self.assertEqual(sel.xpath("//ul/li/text()").get(), "1") + def test_selector_get_strip(self) -> None: + body = '' + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(), '
  • 2
  • ' + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(strip=True), + '
  • 2
  • ', + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(), " 2 " + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(strip=True), "2" + ) + + def test_selector_getall_strip(self) -> None: + body = ( + '' + ) + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li").getall(), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li").getall(strip=True), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(), ["1", " 2 ", " 3"] + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(strip=True), ["1", "2", "3"] + ) + def test_re_first(self) -> None: """Test if re_first() returns first matched element""" body = ''