microsoft · calebrob6 · Aug 3, 2023 · Dec 20, 2021 · Jan 8, 2022 · Jul 6, 2022
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -272,6 +272,13 @@ OSCD
 
 .. autoclass:: OSCD
 
+PASTIS
+^^^^^^
+
+.. autoclass:: PASTIS
+.. autoclass:: PASTISSemanticSegmentation
+.. autoclass:: PASTISInstanceSegmentation
+
 PatternNet
 ^^^^^^^^^^
 

diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv
@@ -21,6 +21,7 @@ Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands
 `Million-AID`_,C,Google Earth,1M,51--73,,0.5--153,RGB
 `NASA Marine Debris`_,OD,PlanetScope,707,1,256x256,3,RGB
 `OSCD`_,CD,Sentinel-2,24,2,"40--1,180",60,MSI
+`PASTIS`_,I,Sentinel-1/2,"2,433",19,128x128xT,10,MSI
 `PatternNet`_,C,Google Earth,"30,400",38,256x256,0.06--5,RGB
 `Potsdam`_,S,Aerial,38,6,"6,000x6,000",0.05,MSI
 `ReforesTree`_,"OD, R",Aerial,100,6,"4,000x4,000",0.02,RGB

diff --git a/tests/data/pastis/PASTIS-R.zip b/tests/data/pastis/PASTIS-R.zip
diff --git a/tests/data/pastis/data.py b/tests/data/pastis/data.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import hashlib
+import os
+import shutil
+from typing import Union
+
+import fiona
+import numpy as np
+
+SIZE = 32
+NUM_SAMPLES = 5
+MAX_NUM_TIME_STEPS = 10
+np.random.seed(0)
+
+FILENAME_HIERARCHY = Union[dict[str, "FILENAME_HIERARCHY"], list[str]]
+
+filenames: FILENAME_HIERARCHY = {
+    "DATA_S2": ["S2"],
+    "DATA_S1A": ["S1A"],
+    "DATA_S1D": ["S1D"],
+    "ANNOTATIONS": ["TARGET"],
+    "INSTANCE_ANNOTATIONS": ["INSTANCES"],
+}
+
+
+def create_file(path: str) -> None:
+    for i in range(NUM_SAMPLES):
+        new_path = f"{path}_{i}.npy"
+        fn = os.path.basename(new_path)
+        t = np.random.randint(1, MAX_NUM_TIME_STEPS)
+        if fn.startswith("S2"):
+            data = np.random.randint(0, 256, size=(t, 10, SIZE, SIZE)).astype(np.int16)
+        elif fn.startswith("S1A"):
+            data = np.random.randint(0, 256, size=(t, 3, SIZE, SIZE)).astype(np.float16)
+        elif fn.startswith("S1D"):
+            data = np.random.randint(0, 256, size=(t, 3, SIZE, SIZE)).astype(np.float16)
+        elif fn.startswith("TARGET"):
+            data = np.random.randint(0, 20, size=(3, SIZE, SIZE)).astype(np.uint8)
+        elif fn.startswith("INSTANCES"):
+            data = np.random.randint(0, 100, size=(SIZE, SIZE)).astype(np.int64)
+        np.save(new_path, data)
+
+
+def create_directory(directory: str, hierarchy: FILENAME_HIERARCHY) -> None:
+    if isinstance(hierarchy, dict):
+        # Recursive case
+        for key, value in hierarchy.items():
+            path = os.path.join(directory, key)
+            os.makedirs(path, exist_ok=True)
+            create_directory(path, value)
+    else:
+        # Base case
+        for value in hierarchy:
+            path = os.path.join(directory, value)
+            create_file(path)
+
+
+if __name__ == "__main__":
+    create_directory("PASTIS-R", filenames)
+
+    schema = {"geometry": "Polygon", "properties": {"Fold": "int", "ID_PATCH": "int"}}
+    with fiona.open(
+        os.path.join("PASTIS-R", "metadata.geojson"),
+        "w",
+        "GeoJSON",
+        crs="EPSG:4326",
+        schema=schema,
+    ) as f:
+        for i in range(NUM_SAMPLES):
+            f.write(
+                {
+                    "geometry": {
+                        "type": "Polygon",
+                        "coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]],
+                    },
+                    "id": str(i),
+                    "properties": {"Fold": i % 5, "ID_PATCH": i},
+                }
+            )
+
+    filename = "PASTIS-R.zip"
+    shutil.make_archive(filename.replace(".zip", ""), "zip", ".", "PASTIS-R")
+
+    # Compute checksums
+    with open(filename, "rb") as f:
+        md5 = hashlib.md5(f.read()).hexdigest()
+        print(f"{filename}: {md5}")
diff --git a/tests/datasets/test_pastis.py b/tests/datasets/test_pastis.py
@@ -0,0 +1,154 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+import shutil
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pytest
+import torch
+import torch.nn as nn
+from _pytest.fixtures import SubRequest
+from pytest import MonkeyPatch
+from torch.utils.data import ConcatDataset
+
+import torchgeo.datasets.utils
+from torchgeo.datasets import (
+    PASTIS,
+    PASTISInstanceSegmentation,
+    PASTISSemanticSegmentation,
+)
+
+
+def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
+    shutil.copy(url, root)
+
+
+class TestPASTIS:
+    @pytest.fixture
+    def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> PASTIS:
+        monkeypatch.setattr(torchgeo.datasets.pastis, "download_url", download_url)
+
+        md5 = "9b11ae132623a0d13f7f0775d2003703"
+        monkeypatch.setattr(PASTIS, "md5", md5)
+        url = os.path.join("tests", "data", "pastis", "PASTIS-R.zip")
+        monkeypatch.setattr(PASTIS, "url", url)
+        root = str(tmp_path)
+        transforms = nn.Identity()
+        return PASTIS(root, (0, 1), "s2", transforms, download=True, checksum=True)
+
+    def test_getitem_not_implemented(self, dataset: PASTIS) -> None:
+        with pytest.raises(NotImplementedError):
+            dataset[0]
+
+    def test_load_target_not_implemented(self, dataset: PASTIS) -> None:
+        with pytest.raises(NotImplementedError):
+            dataset._load_target(0)
+
+
+class TestPASTISSemanticSegmentation:
+    @pytest.fixture(
+        params=[
+            {"folds": (0, 1), "bands": "s2"},
+            {"folds": (0, 1), "bands": "s1a"},
+            {"folds": (0, 1), "bands": "s1d"},
+        ]
+    )
+    def dataset(
+        self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
+    ) -> PASTISSemanticSegmentation:
+        monkeypatch.setattr(torchgeo.datasets.pastis, "download_url", download_url)
+
+        md5 = "2084aaa69ec55da5ddb0be69e1e941fe"
+        monkeypatch.setattr(PASTIS, "md5", md5)
+        url = os.path.join("tests", "data", "pastis", "PASTIS-R.zip")
+        monkeypatch.setattr(PASTIS, "url", url)
+        root = str(tmp_path)
+        folds = request.param["folds"]
+        bands = request.param["bands"]
+        transforms = nn.Identity()
+        return PASTISSemanticSegmentation(
+            root, folds, bands, transforms, download=True, checksum=True
+        )
+
+    def test_getitem(self, dataset: PASTISSemanticSegmentation) -> None:
+        x = dataset[0]
+        assert isinstance(x, dict)
+        assert isinstance(x["image"], torch.Tensor)
+        assert isinstance(x["mask"], torch.Tensor)
+
+    def test_len(self, dataset: PASTISSemanticSegmentation) -> None:
+        assert len(dataset) == 2
+
+    def test_add(self, dataset: PASTISSemanticSegmentation) -> None:
+        ds = dataset + dataset
+        assert isinstance(ds, ConcatDataset)
+        assert len(ds) == 4
+
+    def test_already_extracted(self, dataset: PASTISSemanticSegmentation) -> None:
+        PASTISSemanticSegmentation(root=dataset.root, download=True)
+
+    def test_already_downloaded(self, tmp_path: Path) -> None:
+        url = os.path.join("tests", "data", "pastis", "PASTIS-R.zip")
+        root = str(tmp_path)
+        shutil.copy(url, root)
+        PASTISSemanticSegmentation(root)
+
+    def test_not_downloaded(self, tmp_path: Path) -> None:
+        with pytest.raises(RuntimeError, match="Dataset not found"):
+            PASTISSemanticSegmentation(str(tmp_path))
+
+    def test_corrupted(self, tmp_path: Path) -> None:
+        with open(os.path.join(tmp_path, "PASTIS-R.zip"), "w") as f:
+            f.write("bad")
+        with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
+            PASTISSemanticSegmentation(root=str(tmp_path), checksum=True)
+
+    def test_invalid_fold(self) -> None:
+        with pytest.raises(AssertionError):
+            PASTISSemanticSegmentation(folds=(6,))
+
+    def test_plot(self, dataset: PASTISSemanticSegmentation) -> None:
+        x = dataset[0].copy()
+        dataset.plot(x, suptitle="Test")
+        plt.close()
+        dataset.plot(x, show_titles=False)
+        plt.close()
+        x["prediction"] = x["mask"].clone()
+        dataset.plot(x)
+        plt.close()
+
+
+class TestPASTISInstanceSegmentation:
+    @pytest.fixture(
+        params=[
+            {"folds": (0, 1), "bands": "s2"},
+            {"folds": (0, 1), "bands": "s1a"},
+            {"folds": (0, 1), "bands": "s1d"},
+        ]
+    )
+    def dataset(
+        self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
+    ) -> PASTISInstanceSegmentation:
+        monkeypatch.setattr(torchgeo.datasets.pastis, "download_url", download_url)
+
+        md5 = "9b11ae132623a0d13f7f0775d2003703"
+        monkeypatch.setattr(PASTIS, "md5", md5)
+        url = os.path.join("tests", "data", "pastis", "PASTIS-R.zip")
+        monkeypatch.setattr(PASTIS, "url", url)
+        root = str(tmp_path)
+        folds = request.param["folds"]
+        bands = request.param["bands"]
+        transforms = nn.Identity()
+        return PASTISInstanceSegmentation(
+            root, folds, bands, transforms, download=True, checksum=True
+        )
+
+    def test_getitem(self, dataset: PASTISSemanticSegmentation) -> None:
+        x = dataset[0]
+        assert isinstance(x, dict)
+        assert isinstance(x["image"], torch.Tensor)
+        assert isinstance(x["mask"], torch.Tensor)
+        assert isinstance(x["boxes"], torch.Tensor)
+        assert isinstance(x["label"], torch.Tensor)
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
@@ -78,6 +78,7 @@
 from .nlcd import NLCD
 from .openbuildings import OpenBuildings
 from .oscd import OSCD
+from .pastis import PASTIS, PASTISInstanceSegmentation, PASTISSemanticSegmentation
 from .patternnet import PatternNet
 from .potsdam import Potsdam2D
 from .reforestree import ReforesTree
@@ -194,6 +195,9 @@
     "MillionAID",
     "NASAMarineDebris",
     "OSCD",
+    "PASTIS",
+    "PASTISInstanceSegmentation",
+    "PASTISSemanticSegmentation",
     "PatternNet",
     "Potsdam2D",
     "RESISC45",