2024-10-12 nightly release (ed55b03)

pytorch · Oct 12, 2024 · 8a2923c · 8a2923c
1 parent 800c366
commit 8a2923c
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 5 deletions.
diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py
@@ -15,6 +15,7 @@ class StanfordCars(VisionDataset):
     has been split roughly in a 50-50 split
 
     The original URL is https://ai.stanford.edu/~jkrause/cars/car_dataset.html, but it is broken.
+    Follow the instructions in ``download`` argument to obtain and use the dataset offline.
 
     .. note::
 
@@ -29,8 +30,12 @@ class StanfordCars(VisionDataset):
             target and transforms it.
         download (bool, optional): This parameter exists for backward compatibility but it does not
             download the dataset, since the original URL is not available anymore. The dataset
-            seems to be available on Kaggle so you can try to manually download it using
-            `these instructions <https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616>`_.
+            seems to be available on Kaggle so you can try to manually download and configure it using
+            `these instructions <https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616>`_,
+            or use an integrated
+            `dataset on Kaggle <https://github.com/pytorch/vision/issues/7545#issuecomment-2282674373>`_.
+            In both cases, first download and configure the dataset locally, and use the dataset with
+            ``"download=False"``.
     """
 
     def __init__(

diff --git a/torchvision/io/video.py b/torchvision/io/video.py
@@ -62,7 +62,12 @@ def write_video(
     audio_options: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
-    Writes a 4d tensor in [T, H, W, C] format in a video file
+    Writes a 4d tensor in [T, H, W, C] format in a video file.
+
+    This function relies on PyAV (therefore, ultimately FFmpeg) to encode
+    videos, you can get more fine-grained control by referring to the other
+    options at your disposal within `the FFMpeg wiki
+    <http://trac.ffmpeg.org/wiki#Encoding>`_.
 
     .. warning::
 
@@ -78,12 +83,25 @@ def write_video(
             as a uint8 tensor in [T, H, W, C] format
         fps (Number): video frames per second
         video_codec (str): the name of the video codec, i.e. "libx264", "h264", etc.
-        options (Dict): dictionary containing options to be passed into the PyAV video stream
+        options (Dict): dictionary containing options to be passed into the PyAV video stream.
+            The list of options is codec-dependent and can all
+            be found from `the FFMpeg wiki <http://trac.ffmpeg.org/wiki#Encoding>`_.
         audio_array (Tensor[C, N]): tensor containing the audio, where C is the number of channels
             and N is the number of samples
         audio_fps (Number): audio sample rate, typically 44100 or 48000
         audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc.
-        audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream
+        audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream.
+            The list of options is codec-dependent and can all
+            be found from `the FFMpeg wiki <http://trac.ffmpeg.org/wiki#Encoding>`_.
+
+    Examples::
+        >>> # Creating libx264 video with CRF 17, for visually lossless footage:
+        >>>
+        >>> from torchvision.io import write_video
+        >>> # 1000 frames of 100x100, 3-channel image.
+        >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8)
+        >>> write_video("video.mp4", options = {"crf": "17"})
+
     """
     if not torch.jit.is_scripting() and not torch.jit.is_tracing():
         _log_api_usage_once(write_video)