diff --git a/README.md b/README.md index 4b5da8d53..8fbc4dac0 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This is the reference PyTorch implementation for training and testing depth esti > **Digging into Self-Supervised Monocular Depth Prediction** > -> [Clément Godard](http://www0.cs.ucl.ac.uk/staff/C.Godard/), [Oisin Mac Aodha](http://vision.caltech.edu/~macaodha/), [Michael Firman](http://www.michaelfirman.co.uk) and [Gabriel J. Brostow](http://www0.cs.ucl.ac.uk/staff/g.brostow/) +> [Clément Godard](http://www0.cs.ucl.ac.uk/staff/C.Godard/), [Oisin Mac Aodha](http://vision.caltech.edu/~macaodha/), [Michael Firman](http://www.michaelfirman.co.uk) and [Gabriel J. Brostow](http://www0.cs.ucl.ac.uk/staff/g.brostow/) > > [ICCV 2019 (arXiv pdf)](https://arxiv.org/abs/1806.01260) @@ -49,14 +49,20 @@ We also recommend using `pillow-simd` instead of `pillow` for faster image prepr ## 🖼️ Prediction for a single image -You can predict depth for a single image with: +You can predict scaled disparity for a single image with: + ```shell python test_simple.py --image_path assets/test_image.jpg --model_name mono+stereo_640x192 ``` -On its first run this will download the `mono+stereo_640x192` pretrained model (99MB) into the `models/` folder. -We provide the following options for `--model_name`: +or, if you are using a stereo-trained model, you can estimate metric depth with + +```shell +python test_simple.py --image_path assets/test_image.jpg --model_name mono+stereo_640x192 --pred_metric_depth +``` +On its first run either of these commands will download the `mono+stereo_640x192` pretrained model (99MB) into the `models/` folder. +We provide the following options for `--model_name`: | `--model_name` | Training modality | Imagenet pretrained? | Model resolution | KITTI abs. rel. error | delta < 1.25 | |-------------------------|-------------------|--------------------------|-----------------|------|----------------| @@ -72,7 +78,7 @@ We provide the following options for `--model_name`: You can also download models trained on the odometry split with [monocular](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_odom_640x192.zip) and [mono+stereo](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_odom_640x192.zip) training modalities. -Finally, we provide resnet 50 depth estimation models trained with [ImageNet pretrained weights](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_resnet50_640x192.zip) and [trained from scratch](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_resnet50_no_pt_640x192.zip). +Finally, we provide resnet 50 depth estimation models trained with [ImageNet pretrained weights](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_resnet50_640x192.zip) and [trained from scratch](https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_resnet50_no_pt_640x192.zip). Make sure to set `--num_layers 50` if using these. ## 💾 KITTI training data diff --git a/datasets/kitti_dataset.py b/datasets/kitti_dataset.py index 81ef28823..120ca35fc 100644 --- a/datasets/kitti_dataset.py +++ b/datasets/kitti_dataset.py @@ -21,7 +21,11 @@ class KITTIDataset(MonoDataset): def __init__(self, *args, **kwargs): super(KITTIDataset, self).__init__(*args, **kwargs) - # NOTE: Make sure your intrinsics matrix is *normalized* by the original image size + # NOTE: Make sure your intrinsics matrix is *normalized* by the original image size. + # To normalize you need to scale the first row by 1 / image_width and the second row + # by 1 / image_height. Monodepth2 assumes a principal point to be exactly centered. + # If your principal point is far from the center you might need to disable the horizontal + # flip augmentation. self.K = np.array([[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], diff --git a/test_simple.py b/test_simple.py index f4dd77435..d74d63be5 100644 --- a/test_simple.py +++ b/test_simple.py @@ -21,6 +21,7 @@ import networks from layers import disp_to_depth from utils import download_model_if_doesnt_exist +from evaluate_depth import STEREO_SCALE_FACTOR def parse_args(): @@ -46,6 +47,10 @@ def parse_args(): parser.add_argument("--no_cuda", help='if set, disables CUDA', action='store_true') + parser.add_argument("--pred_metric_depth", + help='if set, predicts metric depth instead of disparity. (This only ' + 'makes sense for stereo-trained KITTI models).', + action='store_true') return parser.parse_args() @@ -61,6 +66,10 @@ def test_simple(args): else: device = torch.device("cpu") + if args.pred_metric_depth and "stereo" not in args.model_name: + print("Warning: The --pred_metric_depth flag only makes sense for stereo-trained KITTI " + "models. For mono-trained models, output depths will not in metric space.") + download_model_if_doesnt_exist(args.model_name) model_path = os.path.join("models", args.model_name) print("-> Loading model from ", model_path) @@ -129,9 +138,14 @@ def test_simple(args): # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] - name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) - scaled_disp, _ = disp_to_depth(disp, 0.1, 100) - np.save(name_dest_npy, scaled_disp.cpu().numpy()) + scaled_disp, depth = disp_to_depth(disp, 0.1, 100) + if args.pred_metric_depth: + name_dest_npy = os.path.join(output_directory, "{}_depth.npy".format(output_name)) + metric_depth = STEREO_SCALE_FACTOR * depth.cpu().numpy() + np.save(name_dest_npy, metric_depth) + else: + name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) + np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() @@ -144,8 +158,10 @@ def test_simple(args): name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name)) im.save(name_dest_im) - print(" Processed {:d} of {:d} images - saved prediction to {}".format( - idx + 1, len(paths), name_dest_im)) + print(" Processed {:d} of {:d} images - saved predictions to:".format( + idx + 1, len(paths))) + print(" - {}".format(name_dest_im)) + print(" - {}".format(name_dest_npy)) print('-> Done!')