diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py index c029ed0d358..6264de82eb7 100644 --- a/torchvision/datasets/stanford_cars.py +++ b/torchvision/datasets/stanford_cars.py @@ -15,6 +15,7 @@ class StanfordCars(VisionDataset): has been split roughly in a 50-50 split The original URL is https://ai.stanford.edu/~jkrause/cars/car_dataset.html, but it is broken. + Follow the instructions in ``download`` argument to obtain and use the dataset offline. .. note:: @@ -29,8 +30,12 @@ class StanfordCars(VisionDataset): target and transforms it. download (bool, optional): This parameter exists for backward compatibility but it does not download the dataset, since the original URL is not available anymore. The dataset - seems to be available on Kaggle so you can try to manually download it using - `these instructions `_. + seems to be available on Kaggle so you can try to manually download and configure it using + `these instructions `_, + or use an integrated + `dataset on Kaggle `_. + In both cases, first download and configure the dataset locally, and use the dataset with + ``"download=False"``. """ def __init__( diff --git a/torchvision/io/video.py b/torchvision/io/video.py index 73c97f37e29..9f768ed555d 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -62,7 +62,12 @@ def write_video( audio_options: Optional[Dict[str, Any]] = None, ) -> None: """ - Writes a 4d tensor in [T, H, W, C] format in a video file + Writes a 4d tensor in [T, H, W, C] format in a video file. + + This function relies on PyAV (therefore, ultimately FFmpeg) to encode + videos, you can get more fine-grained control by referring to the other + options at your disposal within `the FFMpeg wiki + `_. .. warning:: @@ -78,12 +83,25 @@ def write_video( as a uint8 tensor in [T, H, W, C] format fps (Number): video frames per second video_codec (str): the name of the video codec, i.e. "libx264", "h264", etc. - options (Dict): dictionary containing options to be passed into the PyAV video stream + options (Dict): dictionary containing options to be passed into the PyAV video stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. audio_array (Tensor[C, N]): tensor containing the audio, where C is the number of channels and N is the number of samples audio_fps (Number): audio sample rate, typically 44100 or 48000 audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc. - audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream + audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. + + Examples:: + >>> # Creating libx264 video with CRF 17, for visually lossless footage: + >>> + >>> from torchvision.io import write_video + >>> # 1000 frames of 100x100, 3-channel image. + >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8) + >>> write_video("video.mp4", options = {"crf": "17"}) + """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(write_video)