facebookresearch · chenxwh · Sep 29, 2022
diff --git a/predict.py b/predict.py
@@ -1,85 +1,85 @@
 import sys
 import cv2
-import tempfile
-from pathlib import Path
-import cog
 import time
+import tempfile
+from cog import BasePredictor, Input, Path
 
-# import some common detectron2 utilities
 from detectron2.engine import DefaultPredictor
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import Visualizer
 from detectron2.data import MetadataCatalog
 
 # Detic libraries
-sys.path.insert(0, 'third_party/CenterNet2/')
+sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
 from centernet.config import add_centernet_config
 from detic.config import add_detic_config
 from detic.modeling.utils import reset_cls_test
 from detic.modeling.text.text_encoder import build_text_encoder
 
-class Predictor(cog.Predictor):
+
+class Predictor(BasePredictor):
     def setup(self):
         cfg = get_cfg()
         add_centernet_config(cfg)
         add_detic_config(cfg)
-        cfg.merge_from_file("configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml")
-        cfg.MODEL.WEIGHTS = 'Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth'
+        cfg.merge_from_file(
+            "configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml"
+        )
+        cfg.MODEL.WEIGHTS = "Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth"
         cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
-        cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = 'rand'
+        cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = "rand"
         cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = True
         self.predictor = DefaultPredictor(cfg)
         self.BUILDIN_CLASSIFIER = {
-            'lvis': 'datasets/metadata/lvis_v1_clip_a+cname.npy',
-            'objects365': 'datasets/metadata/o365_clip_a+cnamefix.npy',
-            'openimages': 'datasets/metadata/oid_clip_a+cname.npy',
-            'coco': 'datasets/metadata/coco_clip_a+cname.npy',
+            "lvis": "datasets/metadata/lvis_v1_clip_a+cname.npy",
+            "objects365": "datasets/metadata/o365_clip_a+cnamefix.npy",
+            "openimages": "datasets/metadata/oid_clip_a+cname.npy",
+            "coco": "datasets/metadata/coco_clip_a+cname.npy",
         }
         self.BUILDIN_METADATA_PATH = {
-            'lvis': 'lvis_v1_val',
-            'objects365': 'objects365_v2_val',
-            'openimages': 'oid_val_expanded',
-            'coco': 'coco_2017_val',
+            "lvis": "lvis_v1_val",
+            "objects365": "objects365_v2_val",
+            "openimages": "oid_val_expanded",
+            "coco": "coco_2017_val",
         }
 
-    @cog.input(
-        "image",
-        type=Path,
-        help="input image",
-    )
-    @cog.input(
-        "vocabulary",
-        type=str,
-        default='lvis',
-        options=['lvis', 'objects365', 'openimages', 'coco', 'custom'],
-        help="Choose vocabulary",
-    )
-    @cog.input(
-        "custom_vocabulary",
-        type=str,
-        default=None,
-        help="Type your own vocabularies, separated by coma ','",
-    )
-    def predict(self, image, vocabulary, custom_vocabulary):
+    def predict(
+        self,
+        image: Path = Input(description="Input image"),
+        vocabulary: str = Input(
+            default="lvis",
+            choices=["lvis", "objects365", "openimages", "coco", "custom"],
+            description="Choose vocabulary",
+        ),
+        custom_vocabulary: str = Input(
+            default=None,
+            description="Type your own vocabularies, separated by coma ','",
+        ),
+    ) -> Path:
         image = cv2.imread(str(image))
-        if not vocabulary == 'custom':
+        if not vocabulary == "custom":
             metadata = MetadataCatalog.get(self.BUILDIN_METADATA_PATH[vocabulary])
             classifier = self.BUILDIN_CLASSIFIER[vocabulary]
             num_classes = len(metadata.thing_classes)
             reset_cls_test(self.predictor.model, classifier, num_classes)
 
         else:
-            assert custom_vocabulary is not None and len(custom_vocabulary.split(',')) > 0, \
-                "Please provide your own vocabularies when vocabulary is set to 'custom'."
+            assert (
+                custom_vocabulary is not None and len(custom_vocabulary.split(",")) > 0
+            ), "Please provide your own vocabularies when vocabulary is set to 'custom'."
             metadata = MetadataCatalog.get(str(time.time()))
-            metadata.thing_classes = custom_vocabulary.split(',')
+            metadata.thing_classes = custom_vocabulary.split(",")
             classifier = get_clip_embeddings(metadata.thing_classes)
             num_classes = len(metadata.thing_classes)
             reset_cls_test(self.predictor.model, classifier, num_classes)
             # Reset visualization threshold
             output_score_threshold = 0.3
-            for cascade_stages in range(len(self.predictor.model.roi_heads.box_predictor)):
-                self.predictor.model.roi_heads.box_predictor[cascade_stages].test_score_thresh = output_score_threshold
+            for cascade_stages in range(
+                len(self.predictor.model.roi_heads.box_predictor)
+            ):
+                self.predictor.model.roi_heads.box_predictor[
+                    cascade_stages
+                ].test_score_thresh = output_score_threshold
 
         outputs = self.predictor(image)
         v = Visualizer(image[:, :, ::-1], metadata)
@@ -89,7 +89,7 @@ def predict(self, image, vocabulary, custom_vocabulary):
         return out_path
 
 
-def get_clip_embeddings(vocabulary, prompt='a '):
+def get_clip_embeddings(vocabulary, prompt="a "):
     text_encoder = build_text_encoder(pretrain=True)
     text_encoder.eval()
     texts = [prompt + x for x in vocabulary]