Skip to content

Commit

Permalink
Issue scrapy#249 - Add strip to get() and getall()
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Boff Nunes committed Oct 28, 2022
1 parent 1913fb7 commit fbdb881
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 5 deletions.
18 changes: 14 additions & 4 deletions parsel/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,15 @@ def re_first(
return el
return default

def getall(self) -> List[str]:
def getall(self, strip: bool = False) -> List[str]:
"""
Call the ``.get()`` method for each element is this list and return
their results flattened, as a list of strings.
"""
return [x.get() for x in self]
data = [x.get() for x in self]
if strip:
return [x.strip() if x else x for x in data]
return data

extract = getall

Expand All @@ -217,13 +220,20 @@ def get(self, default: None = None) -> Optional[str]:
def get(self, default: str) -> str:
pass

def get(self, default: Optional[str] = None) -> Optional[str]:
@typing.overload
def get(self, strip: bool) -> str:
pass

def get(
self, default: Optional[str] = None, strip: Optional[bool] = False
) -> Optional[str]:
"""
Return the result of ``.get()`` for the first element in this list.
If the list is empty, return the default value.
"""
for x in self:
return x.get()
value = x.get()
return value.strip() if strip and value else value
return default

extract_first = get
Expand Down
49 changes: 48 additions & 1 deletion tests/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_simple_selection(self) -> None:
def test_simple_selection_with_variables(self) -> None:
"""Using XPath variables"""
body = "<p><input name='a' value='1'/><input name='b' value='2'/></p>"
sel = self.sscls(text=body)
sel = self.sscls(text=body) # Selector doesnt have default field

self.assertEqual(
[
Expand Down Expand Up @@ -323,6 +323,53 @@ def test_selectorlist_get_alias(self) -> None:
self.assertEqual(sel.xpath("//ul/li").get(), '<li id="1">1</li>')
self.assertEqual(sel.xpath("//ul/li/text()").get(), "1")

def test_selector_get_strip(self) -> None:
body = '<ul><li id="1">1</li><li id="2"> 2 </li><li id="3">3</li></ul>'
sel = self.sscls(text=body)

self.assertEqual(
sel.xpath("//ul/li[position()>1]").get(), '<li id="2"> 2 </li>'
)
self.assertEqual(
sel.xpath("//ul/li[position()>1]").get(strip=True),
'<li id="2"> 2 </li>',
)
self.assertEqual(
sel.xpath("//ul/li[position()>1]/text()").get(), " 2 "
)
self.assertEqual(
sel.xpath("//ul/li[position()>1]/text()").get(strip=True), "2"
)

def test_selector_getall_strip(self) -> None:
body = (
'<ul><li id="1">1</li><li id="2"> 2 </li><li id="3"> 3</li></ul>'
)
sel = self.sscls(text=body)

self.assertEqual(
sel.xpath("//ul/li").getall(),
[
'<li id="1">1</li>',
'<li id="2"> 2 </li>',
'<li id="3"> 3</li>',
],
)
self.assertEqual(
sel.xpath("//ul/li").getall(strip=True),
[
'<li id="1">1</li>',
'<li id="2"> 2 </li>',
'<li id="3"> 3</li>',
],
)
self.assertEqual(
sel.xpath("//ul/li/text()").getall(), ["1", " 2 ", " 3"]
)
self.assertEqual(
sel.xpath("//ul/li/text()").getall(strip=True), ["1", "2", "3"]
)

def test_re_first(self) -> None:
"""Test if re_first() returns first matched element"""
body = '<ul><li id="1">1</li><li id="2">2</li></ul>'
Expand Down

0 comments on commit fbdb881

Please sign in to comment.