Skip to content

Commit

Permalink
Implement utils.get_field_meta_from_class (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta authored Sep 16, 2020
1 parent ecbb76d commit 4685de9
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 38 deletions.
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,23 @@ _`itemadapter.utils.is_item(obj: Any) -> bool`_
Return `True` if the given object belongs to one of the supported types,
`False` otherwise.

### `get_field_meta_from_class` function

_`itemadapter.utils.get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType`_

Given an item class and a field name, return a
[`MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType)
object, which is a read-only mapping with metadata about the given field. If the item class does not
support field metadata, or there is no metadata for the given field, an empty object is returned.


## Metadata support

`scrapy.item.Item`, `dataclass` and `attrs` objects allow the inclusion of
arbitrary field metadata, which can be retrieved with the
`ItemAdapter.get_field_meta` method. The definition procedure depends on the
underlying type.
arbitrary field metadata. This can be retrieved from an item instance with the
`itemadapter.adapter.ItemAdapter.get_field_meta` method, or from an item class
with the `itemadapter.utils.get_field_meta_from_class` function.
The definition procedure depends on the underlying type.

#### `scrapy.item.Item` objects

Expand Down
2 changes: 1 addition & 1 deletion itemadapter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .adapter import ItemAdapter # noqa: F401
from .utils import is_item # noqa: F401
from .utils import get_field_meta_from_class, is_item # noqa: F401


__version__ = "0.1.0"
31 changes: 8 additions & 23 deletions itemadapter/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
from types import MappingProxyType
from typing import Any, Iterator

from .utils import is_item, is_attrs_instance, is_dataclass_instance, is_scrapy_item
from .utils import (
get_field_meta_from_class,
is_attrs_instance,
is_dataclass_instance,
is_item,
is_scrapy_item,
)


class ItemAdapter(MutableMapping):
Expand Down Expand Up @@ -86,28 +92,7 @@ def get_field_meta(self, field_name: str) -> MappingProxyType:
The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view
of the original mapping, which gets automatically updated if the original mapping changes.
"""
if is_scrapy_item(self.item):
return MappingProxyType(self.item.fields[field_name])
elif is_dataclass_instance(self.item):
from dataclasses import fields

for field in fields(self.item):
if field.name == field_name:
return field.metadata # type: ignore
raise KeyError(
"%s does not support field: %s" % (self.item.__class__.__name__, field_name)
)
elif is_attrs_instance(self.item):
from attr import fields_dict

try:
return fields_dict(self.item.__class__)[field_name].metadata # type: ignore
except KeyError:
raise KeyError(
"%s does not support field: %s" % (self.item.__class__.__name__, field_name)
)
else:
return MappingProxyType({})
return get_field_meta_from_class(self.item.__class__, field_name)

def field_names(self) -> KeysView:
"""
Expand Down
78 changes: 68 additions & 10 deletions itemadapter/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,36 @@
from types import MappingProxyType
from typing import Any


def _get_scrapy_item_classes() -> tuple:
try:
import scrapy
except ImportError:
return ()
else:
try:
_base_item_cls = getattr(scrapy.item, "_BaseItem", scrapy.item.BaseItem) # deprecated
return (scrapy.item.Item, _base_item_cls)
except AttributeError:
return (scrapy.item.Item,)


def _is_dataclass(obj: Any) -> bool:
try:
import dataclasses
except ImportError:
return False
return dataclasses.is_dataclass(obj)


def _is_attrs_class(obj: Any) -> bool:
try:
import attr
except ImportError:
return False
return attr.has(obj)


def is_dataclass_instance(obj: Any) -> bool:
"""
Return True if the given object is a dataclass object, False otherwise.
Expand All @@ -9,22 +39,14 @@ def is_dataclass_instance(obj: Any) -> bool:
Taken from https://docs.python.org/3/library/dataclasses.html#dataclasses.is_dataclass.
"""
try:
import dataclasses
except ImportError:
return False
return dataclasses.is_dataclass(obj) and not isinstance(obj, type)
return _is_dataclass(obj) and not isinstance(obj, type)


def is_attrs_instance(obj: Any) -> bool:
"""
Return True if the given object is a attrs-based object, False otherwise.
"""
try:
import attr
except ImportError:
return False
return attr.has(obj) and not isinstance(obj, type)
return _is_attrs_class(obj) and not isinstance(obj, type)


def is_scrapy_item(obj: Any) -> bool:
Expand Down Expand Up @@ -55,3 +77,39 @@ def is_item(obj: Any) -> bool:
or is_dataclass_instance(obj)
or is_attrs_instance(obj)
)


def get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType:
"""
Return a read-only mapping with metadata for the given field name, within the given item class.
If there is no metadata for the field, or the item class does not support field metadata,
an empty object is returned.
Field metadata is taken from different sources, depending on the item type:
* scrapy.item.Item: corresponding scrapy.item.Field object
* dataclass items: "metadata" attribute for the corresponding field
* attrs items: "metadata" attribute for the corresponding field
The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view
of the original mapping, which gets automatically updated if the original mapping changes.
"""
if issubclass(item_class, _get_scrapy_item_classes()):
return MappingProxyType(item_class.fields[field_name]) # type: ignore
elif _is_dataclass(item_class):
from dataclasses import fields

for field in fields(item_class):
if field.name == field_name:
return field.metadata # type: ignore
raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name))
elif _is_attrs_class(item_class):
from attr import fields_dict

try:
return fields_dict(item_class)[field_name].metadata # type: ignore
except KeyError:
raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name))
elif issubclass(item_class, dict):
return MappingProxyType({})
else:
raise TypeError("%s is not a valid item class" % (item_class,))
59 changes: 58 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import unittest
from unittest import mock
from types import MappingProxyType

from itemadapter.utils import is_item, is_attrs_instance, is_dataclass_instance, is_scrapy_item
from itemadapter.utils import (
get_field_meta_from_class,
is_attrs_instance,
is_dataclass_instance,
is_item,
is_scrapy_item,
)

from tests import AttrsItem, DataClassItem, ScrapyItem, ScrapySubclassedItem

Expand All @@ -10,6 +17,14 @@ def mocked_import(name, *args, **kwargs):
raise ImportError(name)


class InvalidItemClassTestCase(unittest.TestCase):
def test_invalid_item_class(self):
with self.assertRaises(TypeError, msg="1 is not a valid item class"):
get_field_meta_from_class(1, "field")
with self.assertRaises(TypeError, msg="list is not a valid item class"):
get_field_meta_from_class(list, "field")


class ItemLikeTestCase(unittest.TestCase):
def test_false(self):
self.assertFalse(is_item(int))
Expand Down Expand Up @@ -64,11 +79,20 @@ def test_false(self):
@mock.patch("builtins.__import__", mocked_import)
def test_module_not_available(self):
self.assertFalse(is_attrs_instance(AttrsItem(name="asdf", value=1234)))
with self.assertRaises(TypeError, msg="AttrsItem is not a valid item class"):
get_field_meta_from_class(AttrsItem, "name")

@unittest.skipIf(not AttrsItem, "attrs module is not available")
def test_true(self):
self.assertTrue(is_attrs_instance(AttrsItem()))
self.assertTrue(is_attrs_instance(AttrsItem(name="asdf", value=1234)))
# field metadata
self.assertEqual(
get_field_meta_from_class(AttrsItem, "name"), MappingProxyType({"serializer": str})
)
self.assertEqual(
get_field_meta_from_class(AttrsItem, "value"), MappingProxyType({"serializer": int})
)


class DataclassTestCase(unittest.TestCase):
Expand All @@ -92,11 +116,21 @@ def test_false(self):
@mock.patch("builtins.__import__", mocked_import)
def test_module_not_available(self):
self.assertFalse(is_dataclass_instance(DataClassItem(name="asdf", value=1234)))
with self.assertRaises(TypeError, msg="DataClassItem is not a valid item class"):
get_field_meta_from_class(DataClassItem, "name")

@unittest.skipIf(not DataClassItem, "dataclasses module is not available")
def test_true(self):
self.assertTrue(is_dataclass_instance(DataClassItem()))
self.assertTrue(is_dataclass_instance(DataClassItem(name="asdf", value=1234)))
# field metadata
self.assertEqual(
get_field_meta_from_class(DataClassItem, "name"), MappingProxyType({"serializer": str})
)
self.assertEqual(
get_field_meta_from_class(DataClassItem, "value"),
MappingProxyType({"serializer": int}),
)


class ScrapyItemTestCase(unittest.TestCase):
Expand All @@ -118,12 +152,23 @@ def test_false(self):
@mock.patch("builtins.__import__", mocked_import)
def test_module_not_available(self):
self.assertFalse(is_scrapy_item(ScrapySubclassedItem(name="asdf", value=1234)))
with self.assertRaises(TypeError, msg="ScrapySubclassedItem is not a valid item class"):
get_field_meta_from_class(ScrapySubclassedItem, "name")

@unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available")
def test_true(self):
self.assertTrue(is_scrapy_item(ScrapyItem()))
self.assertTrue(is_scrapy_item(ScrapySubclassedItem()))
self.assertTrue(is_scrapy_item(ScrapySubclassedItem(name="asdf", value=1234)))
# field metadata
self.assertEqual(
get_field_meta_from_class(ScrapySubclassedItem, "name"),
MappingProxyType({"serializer": str}),
)
self.assertEqual(
get_field_meta_from_class(ScrapySubclassedItem, "value"),
MappingProxyType({"serializer": int}),
)


try:
Expand Down Expand Up @@ -161,8 +206,20 @@ class SubClassedBaseItem(scrapy.item.BaseItem):

@unittest.skipIf(scrapy is None, "scrapy module is not available")
def test_removed_baseitem(self):
"""
Mock the scrapy.item module so it does not contain the deprecated _BaseItem class
"""

class MockItemModule:
Item = ScrapyItem

with mock.patch("scrapy.item", MockItemModule):
self.assertFalse(is_scrapy_item(dict()))
self.assertEqual(
get_field_meta_from_class(ScrapySubclassedItem, "name"),
MappingProxyType({"serializer": str}),
)
self.assertEqual(
get_field_meta_from_class(ScrapySubclassedItem, "value"),
MappingProxyType({"serializer": int}),
)

0 comments on commit 4685de9

Please sign in to comment.