Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay item class object creation #20

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 35 additions & 25 deletions itemloaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
from contextlib import suppress

from itemadapter import ItemAdapter
from itemadapter import get_field_meta_from_class, ItemAdapter
from parsel.utils import extract_regex, flatten

from itemloaders.common import wrap_loader_context
Expand All @@ -24,6 +24,18 @@ def unbound_method(method):
return method


class _Context(dict):
def __init__(self, item_loader, *args, **kwargs):
super().__init__(*args, **kwargs)
self._item_loader = item_loader

def __getitem__(self, key):
value = super().__getitem__(key)
if key == 'item' and value is None:
value = self[key] = self._item_loader.item
return value


class ItemLoader:
"""
Return a new Item Loader for populating the given item. If no item is
Expand Down Expand Up @@ -102,17 +114,15 @@ class Product:
def __init__(self, item=None, selector=None, parent=None, **context):
self.selector = selector
context.update(selector=selector)
if item is None:
item = self.default_item_class()
self._local_item = item
self._item = item
context['item'] = item
self.context = context
self.context = _Context(parent or self, **context)
self.parent = parent
self._local_values = {}
# values from initial item
for field_name, value in ItemAdapter(item).items():
self._values.setdefault(field_name, [])
self._values[field_name] += arg_to_iter(value)
if item is not None:
for field_name, value in ItemAdapter(item).items():
self._values.setdefault(field_name, [])
self._values[field_name] += arg_to_iter(value)

@property
def _values(self):
Expand All @@ -126,37 +136,29 @@ def item(self):
if self.parent is not None:
return self.parent.item
else:
return self._local_item
if self._item is None:
self._item = self.default_item_class(**self._values)
return self._item

def nested_xpath(self, xpath, **context):
"""
Create a nested loader with an xpath selector.
The supplied selector is applied relative to selector associated
with this :class:`ItemLoader`. The nested loader shares the item
with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
:meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
Comment on lines -135 to -137
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not sure if this should go or stay; “shares the item” is not technically accurate any more, but maybe we should simply reword it so that it indicates that load_item() returns the same object regardless of whether it is called on the original item loader or in a nested item loader.

with this :class:`ItemLoader`.
"""
selector = self.selector.xpath(xpath)
context.update(selector=selector)
subloader = self.__class__(
item=self.item, parent=self, **context
)
return subloader
return self.__class__(parent=self, **context)

def nested_css(self, css, **context):
"""
Create a nested loader with a css selector.
The supplied selector is applied relative to selector associated
with this :class:`ItemLoader`. The nested loader shares the item
with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
:meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
with this :class:`ItemLoader`.
"""
selector = self.selector.css(css)
context.update(selector=selector)
subloader = self.__class__(
item=self.item, parent=self, **context
)
return subloader
return self.__class__(parent=self, **context)

def add_value(self, field_name, value, *processors, **kw):
"""
Expand Down Expand Up @@ -305,7 +307,15 @@ def get_output_processor(self, field_name):
return unbound_method(proc)

def _get_item_field_attr(self, field_name, key, default=None):
field_meta = ItemAdapter(self.item).get_field_meta(field_name)
if self.parent is not None and self.parent._item is not None:
item_adapter = ItemAdapter(self.parent._item)
field_meta = item_adapter.get_field_meta(field_name)
if self._item is not None:
item_adapter = ItemAdapter(self._item)
field_meta = item_adapter.get_field_meta(field_name)
else:
item_class = self.default_item_class
field_meta = get_field_meta_from_class(item_class, field_name)
return field_meta.get(key, default)

def _process_input_value(self, field_name, value):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
'w3lib>=1.17.0',
'parsel>=1.5.0',
'jmespath>=0.9.5',
'itemadapter>=0.1.0',
'itemadapter>=0.1.1',
],
# extras_require=extras_require,
)
61 changes: 61 additions & 0 deletions tests/test_delayed_object_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from collections.abc import MutableMapping
from unittest import TestCase

from parsel import Selector

from itemloaders import ItemLoader


EXPECTED_ERROR = RuntimeError


class UninitializableItem(dict):

def __init__(self, *args, **kwargs):
raise EXPECTED_ERROR


class UninitializableItemLoader(ItemLoader):
default_item_class = UninitializableItem


class DelayedObjectCreationTestCase(TestCase):

def test_loader_creation(self):
UninitializableItemLoader()

def test_add(self):
selector = Selector(text="<html><body></body></html>")
il = UninitializableItemLoader(selector=selector)
il.add_value('key', 'value')
il.add_css('key', 'html')
il.add_xpath('key', '//html')

def test_context(self):
il = UninitializableItemLoader()
context = il.context
with self.assertRaises(EXPECTED_ERROR):
context['item']

def test_load_item(self):
il = UninitializableItemLoader()
with self.assertRaises(EXPECTED_ERROR):
il.load_item()

def test_nested_loader_creation(self):
selector = Selector(text="<html><body></body></html>")
il = UninitializableItemLoader(selector=selector)
il.nested_css('html')
il.nested_xpath('//html')

def test_nested_load_item(self):
selector = Selector(text="<html><body></body></html>")
il = UninitializableItemLoader(selector=selector)

css_il = il.nested_css('html')
with self.assertRaises(EXPECTED_ERROR):
css_il.load_item()

xpath_il = il.nested_xpath('//html')
with self.assertRaises(EXPECTED_ERROR):
xpath_il.load_item()
23 changes: 23 additions & 0 deletions tests/test_nested_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,26 @@ def test_nested_load_item(self):
self.assertEqual(item['name'], ['marta'])
self.assertEqual(item['url'], ['http://www.scrapy.org'])
self.assertEqual(item['image'], ['/images/logo.png'])

def test_nested_from_item(self):
"""Check that everything works as usual when the nested selector has a
parent item"""
item = {'foo': 'bar'}
loader = ItemLoader(selector=self.selector, item=item)
nl1 = loader.nested_xpath('//footer')
nl2 = nl1.nested_xpath('img')

loader.add_xpath('name', '//header/div/text()')
nl1.add_xpath('url', 'a/@href')
nl2.add_xpath('image', '@src')

item = loader.load_item()

assert item is loader.item
assert item is nl1.item
assert item is nl2.item

self.assertEqual(item['foo'], ['bar'])
self.assertEqual(item['name'], ['marta'])
self.assertEqual(item['url'], ['http://www.scrapy.org'])
self.assertEqual(item['image'], ['/images/logo.png'])
23 changes: 19 additions & 4 deletions tests/test_output_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@
from itemloaders.processors import Identity, Compose, TakeFirst


class TestOutputProcessorDict(unittest.TestCase):
def test_output_processor(self):
def take_first(value):
return value[0]


class TestOutputProcessor(unittest.TestCase):

def test_item_class(self):

class TempDict(dict):
def __init__(self, *args, **kwargs):
Expand All @@ -22,9 +27,8 @@ class TempLoader(ItemLoader):
self.assertIsInstance(item, TempDict)
self.assertEqual(dict(item), {'temp': 0.3})

def test_item_object(self):

class TestOutputProcessorItem(unittest.TestCase):
def test_output_processor(self):
class TempLoader(ItemLoader):
default_input_processor = Identity()
default_output_processor = Compose(TakeFirst())
Expand All @@ -35,3 +39,14 @@ class TempLoader(ItemLoader):
item = loader.load_item()
self.assertIsInstance(item, dict)
self.assertEqual(dict(item), {'temp': 0.3})

def test_unbound_processor(self):
"""Ensure that a processor not taking a `self` parameter does not break
anything"""

class TempLoader(ItemLoader):
default_output_processor = take_first

loader = TempLoader()
loader.add_value('foo', 'bar')
self.assertEqual(loader.load_item(), {'foo': 'bar'})
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ deps =

commands =
py.test \
--cov-report=term --cov-report=html --cov-report= --cov=itemloaders \
--cov-report=term --cov-report=html --cov-report=term-missing --cov=itemloaders \
--doctest-modules \
{posargs:itemloaders tests}

Expand Down