From 58687f7664f526e8f404f408ba52a2186bd5989f Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 29 May 2024 15:35:38 +0100
Subject: [PATCH 1/4] Return labels for FER2013 if possible

---
 torchvision/datasets/fer2013.py | 34 ++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/torchvision/datasets/fer2013.py b/torchvision/datasets/fer2013.py
index 057fe695a13..16e810d49aa 100644
--- a/torchvision/datasets/fer2013.py
+++ b/torchvision/datasets/fer2013.py
@@ -25,6 +25,10 @@ class FER2013(VisionDataset):
     _RESOURCES = {
         "train": ("train.csv", "3f0dfb3d3fd99c811a1299cb947e3131"),
         "test": ("test.csv", "b02c2298636a634e8c2faabbf3ea9a23"),
+        # This one also contains both train and tests instances, and unlike test.csv it contains the labels
+        # for the test instances.
+        # It is used if it exists, otherwise "train" and "test" are used for BC, as support for "icml" was added later.
+        "icml": ("icml_face_data.csv", "b114b9e04e6949e5fe8b6a98b3892b1d"),
     }
 
     def __init__(
@@ -34,11 +38,12 @@ def __init__(
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
     ) -> None:
-        self._split = verify_str_arg(split, "split", self._RESOURCES.keys())
+        self._split = verify_str_arg(split, "split", ("train", "test"))
         super().__init__(root, transform=transform, target_transform=target_transform)
 
         base_folder = pathlib.Path(self.root) / "fer2013"
-        file_name, md5 = self._RESOURCES[self._split]
+        use_icml = (base_folder / self._RESOURCES["icml"][0]).exists()
+        file_name, md5 = self._RESOURCES["all" if use_icml else self._split]
         data_file = base_folder / file_name
         if not check_integrity(str(data_file), md5=md5):
             raise RuntimeError(
@@ -47,14 +52,25 @@ def __init__(
                 f"https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge"
             )
 
+        pixels_key = " pixels" if use_icml else "pixels"  # yes, for real
+
+        def get_img(row):
+            return torch.tensor([int(idx) for idx in row[pixels_key].split()], dtype=torch.uint8).reshape(48, 48)
+
+        def get_label(row):
+            if use_icml or self._split == "train":
+                return int(row["emotion"])
+            else:
+                return None
+
         with open(data_file, "r", newline="") as file:
-            self._samples = [
-                (
-                    torch.tensor([int(idx) for idx in row["pixels"].split()], dtype=torch.uint8).reshape(48, 48),
-                    int(row["emotion"]) if "emotion" in row else None,
-                )
-                for row in csv.DictReader(file)
-            ]
+            rows = (row for row in csv.DictReader(file))
+
+            if use_icml:
+                valid_keys = ("Training",) if self._split == "train" else ("PublicTest", "PrivateTest")
+                rows = (row for row in rows if row[" Usage"] in valid_keys)
+
+            self._samples = [(get_img(row), get_label(row)) for row in rows]
 
     def __len__(self) -> int:
         return len(self._samples)

From 8bc0ea2d7b7815f5f27b621440ea41d23e187b11 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 29 May 2024 15:36:58 +0100
Subject: [PATCH 2/4] Fix

---
 torchvision/datasets/fer2013.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/fer2013.py b/torchvision/datasets/fer2013.py
index 16e810d49aa..c612157051a 100644
--- a/torchvision/datasets/fer2013.py
+++ b/torchvision/datasets/fer2013.py
@@ -43,7 +43,7 @@ def __init__(
 
         base_folder = pathlib.Path(self.root) / "fer2013"
         use_icml = (base_folder / self._RESOURCES["icml"][0]).exists()
-        file_name, md5 = self._RESOURCES["all" if use_icml else self._split]
+        file_name, md5 = self._RESOURCES["icml" if use_icml else self._split]
         data_file = base_folder / file_name
         if not check_integrity(str(data_file), md5=md5):
             raise RuntimeError(

From 33021230143177a23c637abc0ee776efc1f74f47 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 4 Jun 2024 10:56:31 +0100
Subject: [PATCH 3/4] Add support for fer2013.csv as well and add tests

---
 test/test_datasets.py           | 70 +++++++++++++++++++++++++--------
 torchvision/datasets/fer2013.py | 49 ++++++++++++++++++-----
 2 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 38a5fe33e3e..f61149affad 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -2442,27 +2442,65 @@ def inject_fake_data(self, tmpdir, config):
         base_folder = os.path.join(tmpdir, "fer2013")
         os.makedirs(base_folder)
 
+        use_icml = config.pop("use_icml", False)
+        use_fer = config.pop("use_fer", False)
+
         num_samples = 5
-        with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
-            writer = csv.DictWriter(
-                file,
-                fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
-                quoting=csv.QUOTE_NONNUMERIC,
-                quotechar='"',
-            )
-            writer.writeheader()
-            for _ in range(num_samples):
-                row = dict(
-                    pixels=" ".join(
-                        str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
-                    )
+        
+        if use_icml or use_fer:
+            pixels_key, usage_key = (" pixels", " Usage") if use_icml else ("pixels", "Usage")
+            fieldnames = ("emotion", usage_key, pixels_key) if use_icml else ("emotion", pixels_key, usage_key)
+            filename = "icml_face_data.csv" if use_icml else "fer2013.csv"
+            with open(os.path.join(base_folder, filename), "w", newline="") as file:
+                writer = csv.DictWriter(
+                    file,
+                    fieldnames=fieldnames,
+                    quoting=csv.QUOTE_NONNUMERIC,
+                    quotechar='"',
                 )
-                if config["split"] == "train":
-                    row["emotion"] = str(int(torch.randint(0, 7, ())))
+                writer.writeheader()
+                for i in range(num_samples):
+                    row = {
+                        "emotion": str(int(torch.randint(0, 7, ()))),
+                        usage_key: "Training" if i % 2 else "PublicTest",
+                        pixels_key:" ".join(
+                            str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                        ),
+                    }
+
+                    writer.writerow(row)
+        else:
+            with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
+                writer = csv.DictWriter(
+                    file,
+                    fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
+                    quoting=csv.QUOTE_NONNUMERIC,
+                    quotechar='"',
+                )
+                writer.writeheader()
+                for _ in range(num_samples):
+                    row = dict(
+                        pixels=" ".join(
+                            str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                        )
+                    )
+                    if config["split"] == "train":
+                        row["emotion"] = str(int(torch.randint(0, 7, ())))
 
-                writer.writerow(row)
+                    writer.writerow(row)
 
         return num_samples
+    
+    def test_icml_file(self):
+        config = {"split": "test"}
+        with self.create_dataset(config=config) as (dataset, _):
+            assert all(s[1] is None for s in dataset)
+
+        for split in ("train", "test"):
+            for d in ({"use_icml": True}, {"use_fer": True}):
+                config = {"split": split, **d}
+                with self.create_dataset(config=config) as (dataset, _):
+                    assert all(s[1] is not None for s in dataset)
 
 
 class GTSRBTestCase(datasets_utils.ImageDatasetTestCase):
diff --git a/torchvision/datasets/fer2013.py b/torchvision/datasets/fer2013.py
index c612157051a..c63b60d09c4 100644
--- a/torchvision/datasets/fer2013.py
+++ b/torchvision/datasets/fer2013.py
@@ -13,9 +13,21 @@ class FER2013(VisionDataset):
     """`FER2013
     <https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge>`_ Dataset.
 
+    .. note::
+        This dataset can return test labels only if ``fer2013.csv`` OR
+        ``icml_face_data.csv`` are present in ``root/fer2013/``. If only
+        ``train.csv`` and ``test.csv`` are present, the test labels are set to
+        ``None``.
+
     Args:
         root (str or ``pathlib.Path``): Root directory of dataset where directory
-            ``root/fer2013`` exists.
+            ``root/fer2013`` exists. This directory may contain either
+            ``fer2013.csv``, ``icml_face_data.csv``, or both ``train.csv`` and
+            ``test.csv``. Precendence is given in that order, i.e. if
+            ``fer2013.csv`` is present then the rest of the files will be
+            ignored. All these (combinations of) files contain the same data and
+            are supported for convenience, but only ``fer2013.csv`` and
+            ``icml_face_data.csv`` are able to return non-None test labels.
         split (string, optional): The dataset split, supports ``"train"`` (default), or ``"test"``.
         transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
             version. E.g, ``transforms.RandomCrop``
@@ -25,9 +37,24 @@ class FER2013(VisionDataset):
     _RESOURCES = {
         "train": ("train.csv", "3f0dfb3d3fd99c811a1299cb947e3131"),
         "test": ("test.csv", "b02c2298636a634e8c2faabbf3ea9a23"),
-        # This one also contains both train and tests instances, and unlike test.csv it contains the labels
-        # for the test instances.
-        # It is used if it exists, otherwise "train" and "test" are used for BC, as support for "icml" was added later.
+        # The fer2013.csv and icml_face_data.csv files contain both train and
+        # tests instances, and unlike test.csv they contain the labels for the
+        # test instances. We give these 2 files precedence over train.csv and
+        # test.csv. And yes, they both contain the same data, but with different
+        # column names (note the spaces) and ordering:
+        # $ head -n 1 fer2013.csv icml_face_data.csv train.csv test.csv 
+        # ==> fer2013.csv <==
+        # emotion,pixels,Usage
+        # 
+        # ==> icml_face_data.csv <==
+        # emotion, Usage, pixels
+        # 
+        # ==> train.csv <==
+        # emotion,pixels
+        # 
+        # ==> test.csv <==
+        # pixels
+        "fer": ("fer2013.csv", "f8428a1edbd21e88f42c73edd2a14f95"),
         "icml": ("icml_face_data.csv", "b114b9e04e6949e5fe8b6a98b3892b1d"),
     }
 
@@ -42,8 +69,9 @@ def __init__(
         super().__init__(root, transform=transform, target_transform=target_transform)
 
         base_folder = pathlib.Path(self.root) / "fer2013"
-        use_icml = (base_folder / self._RESOURCES["icml"][0]).exists()
-        file_name, md5 = self._RESOURCES["icml" if use_icml else self._split]
+        use_fer_file = (base_folder / self._RESOURCES["fer"][0]).exists()
+        use_icml_file = not use_fer_file and (base_folder / self._RESOURCES["icml"][0]).exists()
+        file_name, md5 = self._RESOURCES["fer" if use_fer_file else "icml" if use_icml_file else self._split]
         data_file = base_folder / file_name
         if not check_integrity(str(data_file), md5=md5):
             raise RuntimeError(
@@ -52,13 +80,14 @@ def __init__(
                 f"https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge"
             )
 
-        pixels_key = " pixels" if use_icml else "pixels"  # yes, for real
+        pixels_key = " pixels" if use_icml_file else "pixels"
+        usage_key = " Usage" if use_icml_file else "Usage"
 
         def get_img(row):
             return torch.tensor([int(idx) for idx in row[pixels_key].split()], dtype=torch.uint8).reshape(48, 48)
 
         def get_label(row):
-            if use_icml or self._split == "train":
+            if use_fer_file or use_icml_file or self._split == "train":
                 return int(row["emotion"])
             else:
                 return None
@@ -66,9 +95,9 @@ def get_label(row):
         with open(data_file, "r", newline="") as file:
             rows = (row for row in csv.DictReader(file))
 
-            if use_icml:
+            if use_fer_file or use_icml_file:
                 valid_keys = ("Training",) if self._split == "train" else ("PublicTest", "PrivateTest")
-                rows = (row for row in rows if row[" Usage"] in valid_keys)
+                rows = (row for row in rows if row[usage_key] in valid_keys)
 
             self._samples = [(get_img(row), get_label(row)) for row in rows]
 

From a2d397b2cffbcce17e539f0a9cef2b71752b0536 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 4 Jun 2024 10:57:32 +0100
Subject: [PATCH 4/4] lint

---
 test/test_datasets.py           | 12 +++++++-----
 torchvision/datasets/fer2013.py |  8 ++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index f61149affad..1dc6892c318 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -2446,7 +2446,7 @@ def inject_fake_data(self, tmpdir, config):
         use_fer = config.pop("use_fer", False)
 
         num_samples = 5
-        
+
         if use_icml or use_fer:
             pixels_key, usage_key = (" pixels", " Usage") if use_icml else ("pixels", "Usage")
             fieldnames = ("emotion", usage_key, pixels_key) if use_icml else ("emotion", pixels_key, usage_key)
@@ -2463,8 +2463,9 @@ def inject_fake_data(self, tmpdir, config):
                     row = {
                         "emotion": str(int(torch.randint(0, 7, ()))),
                         usage_key: "Training" if i % 2 else "PublicTest",
-                        pixels_key:" ".join(
-                            str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                        pixels_key: " ".join(
+                            str(pixel)
+                            for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
                         ),
                     }
 
@@ -2481,7 +2482,8 @@ def inject_fake_data(self, tmpdir, config):
                 for _ in range(num_samples):
                     row = dict(
                         pixels=" ".join(
-                            str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                            str(pixel)
+                            for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
                         )
                     )
                     if config["split"] == "train":
@@ -2490,7 +2492,7 @@ def inject_fake_data(self, tmpdir, config):
                     writer.writerow(row)
 
         return num_samples
-    
+
     def test_icml_file(self):
         config = {"split": "test"}
         with self.create_dataset(config=config) as (dataset, _):
diff --git a/torchvision/datasets/fer2013.py b/torchvision/datasets/fer2013.py
index c63b60d09c4..3afda07846b 100644
--- a/torchvision/datasets/fer2013.py
+++ b/torchvision/datasets/fer2013.py
@@ -42,16 +42,16 @@ class FER2013(VisionDataset):
         # test instances. We give these 2 files precedence over train.csv and
         # test.csv. And yes, they both contain the same data, but with different
         # column names (note the spaces) and ordering:
-        # $ head -n 1 fer2013.csv icml_face_data.csv train.csv test.csv 
+        # $ head -n 1 fer2013.csv icml_face_data.csv train.csv test.csv
         # ==> fer2013.csv <==
         # emotion,pixels,Usage
-        # 
+        #
         # ==> icml_face_data.csv <==
         # emotion, Usage, pixels
-        # 
+        #
         # ==> train.csv <==
         # emotion,pixels
-        # 
+        #
         # ==> test.csv <==
         # pixels
         "fer": ("fer2013.csv", "f8428a1edbd21e88f42c73edd2a14f95"),