Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ PySTACItemReaderIterDataPipe for reading STAC Items #46

Merged
merged 6 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ sphinx:
pyogrio:
- 'https://pyogrio.readthedocs.io/en/latest/'
- null
pystac:
- 'https://pystac.readthedocs.io/en/latest/'
- null
python:
- 'https://docs.python.org/3/'
- null
Expand Down
9 changes: 9 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
:show-inheritance:
```

### PySTAC

```{eval-rst}
.. automodule:: zen3geo.datapipes.pystac
.. autoclass:: zen3geo.datapipes.PySTACItemReader
.. autoclass:: zen3geo.datapipes.pystac.PySTACItemReaderIterDataPipe
:show-inheritance:
```

### Rioxarray

```{eval-rst}
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ docs = [
raster = ["xbatcher"]
spatial = [
"datashader",
"pystac",
"spatialpandas"
]
vector = ["pyogrio"]
Expand Down
1 change: 1 addition & 0 deletions zen3geo/datapipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
GeoPandasRectangleClipperIterDataPipe as GeoPandasRectangleClipper,
)
from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader
from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader
from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader
from zen3geo.datapipes.xbatcher import XbatcherSlicerIterDataPipe as XbatcherSlicer
95 changes: 95 additions & 0 deletions zen3geo/datapipes/pystac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
DataPipes for :doc:`pystac <pystac:index>`.
"""
from typing import Any, Dict, Iterator, Optional

try:
import pystac
except ImportError:
pystac = None
from torchdata.datapipes import functional_datapipe
from torchdata.datapipes.iter import IterDataPipe


@functional_datapipe("read_to_pystac_item")
class PySTACItemReaderIterDataPipe(IterDataPipe):
"""
Takes files from local disk or URLs (as long as they can be read by pystac)
and yields :py:class:`pystac.Item` objects (functional name:
``read_to_pystac_item``).

Parameters
----------
source_datapipe : IterDataPipe[str]
A DataPipe that contains filepaths or URL links to STAC items.

kwargs : Optional
Extra keyword arguments to pass to :py:meth:`pystac.Item.from_file`.

Yields
------
stac_item : pystac.Item
An :py:class:`pystac.Item` object containing the specific STACObject
implementation class represented in a JSON format.

Raises
------
ModuleNotFoundError
If ``pystac`` is not installed. See
:doc:`install instructions for pystac <pystac:installation>`, (e.g. via
``pip install pystac``) before using this class.

Example
-------
>>> import pytest
>>> pystac = pytest.importorskip("pystac")
...
>>> from torchdata.datapipes.iter import IterableWrapper
>>> from zen3geo.datapipes import PySTACItemReader
...
>>> # Read in STAC Item using DataPipe
>>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435"
>>> dp = IterableWrapper(iterable=[item_url])
>>> dp_pystac = dp.read_to_pystac_item()
...
>>> # Loop or iterate over the DataPipe stream
>>> it = iter(dp_pystac)
>>> stac_item = next(it)
>>> stac_item.bbox
[103.20205689, 0.81602476, 104.18934086, 1.8096362]
>>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
{'datetime': '2022-01-15T03:21:01.024000Z',
'platform': 'Sentinel-2A',
'proj:epsg': 32648,
'instruments': ['msi'],
's2:mgrs_tile': '48NUG',
'constellation': 'Sentinel 2',
's2:granule_id': 'S2A_OPER_MSI_L2A_TL_ESRI_20220115T170436_A034292_T48NUG_N03.00',
'eo:cloud_cover': 17.352597,
's2:datatake_id': 'GS2A_20220115T032101_034292_N03.00',
's2:product_uri': 'S2A_MSIL2A_20220115T032101_N0300_R118_T48NUG_20220115T170435.SAFE',
's2:datastrip_id': 'S2A_OPER_MSI_L2A_DS_ESRI_20220115T170436_S20220115T033502_N03.00',
's2:product_type': 'S2MSI2A',
'sat:orbit_state': 'descending',
...
"""

def __init__(
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
) -> None:
if pystac is None:
raise ModuleNotFoundError(
"Package `pystac` is required to be installed to use this datapipe. "
"Please use `pip install pystac` or "
"`conda install -c conda-forge pystac` "
"to install the package"
)
self.source_datapipe: IterDataPipe[str] = source_datapipe
self.kwargs = kwargs

def __iter__(self) -> Iterator:
for href in self.source_datapipe:
yield pystac.Item.from_file(href=href, **self.kwargs)

def __len__(self) -> int:
return len(self.source_datapipe)
47 changes: 47 additions & 0 deletions zen3geo/tests/test_datapipes_pystac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Tests for pystac datapipes.
"""
import pytest
from torchdata.datapipes.iter import IterableWrapper

from zen3geo.datapipes import PySTACItemReader

pystac = pytest.importorskip("pystac")

# %%
def test_pystac_item_reader():
"""
Ensure that PySTACItemReader works to read in a JSON STAC item and outputs
to a pystac.Item object.
"""
item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/item/sample-item.json"
dp = IterableWrapper(iterable=[item_url])

# Using class constructors
dp_pystac = PySTACItemReader(source_datapipe=dp)
# Using functional form (recommended)
dp_pystac = dp.read_to_pystac_item()

assert len(dp_pystac) == 1
it = iter(dp_pystac)
stac_item = next(it)

assert stac_item.bbox == [-122.59750209, 37.48803556, -122.2880486, 37.613537207]
assert stac_item.datetime.isoformat() == "2016-05-03T13:22:30.040000+00:00"
assert stac_item.geometry["type"] == "Polygon"
assert stac_item.properties == {
"datetime": "2016-05-03T13:22:30.040000Z",
"title": "A CS3 item",
"license": "PDDL-1.0",
"providers": [
{
"name": "CoolSat",
"roles": ["producer", "licensor"],
"url": "https://cool-sat.com/",
}
],
}
assert (
stac_item.assets["analytic"].extra_fields["product"]
== "http://cool-sat.com/catalog/products/analytic.json"
)