lincc-frameworks · OliviaLynn · Jan 19, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/solo_run_scripts/solo_test_eval_model.py → any_test_eval_model.py b/solo_run_scripts/solo_test_eval_model.py → any_test_eval_model.py
@@ -10,9 +10,10 @@
 import deepdisc.astrodet.astrodet as toolkit
 
 from deepdisc.data_format.file_io import get_data_from_json
-from deepdisc.data_format.image_readers import HSCImageReader
-from deepdisc.inference.match_objects import get_matched_object_classes
+from deepdisc.data_format.image_readers import HSCImageReader, DC2ImageReader
+from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs
 from deepdisc.inference.predictors import return_predictor_transformer
+from deepdisc.model.models import RedshiftPDFCasROIHeads
 from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser
 
 from detectron2 import model_zoo
@@ -63,18 +64,22 @@ def return_predictor(
 if __name__ == "__main__":
     # --------- Handle args
     args = make_inference_arg_parser().parse_args()
+    print("Command Line Args:", args)
+
     roi_thresh = args.roi_thresh
     run_name = args.run_name
     testfile = args.testfile
     savedir = args.savedir
     Path(savedir).mkdir(parents=True, exist_ok=True)
     output_dir = args.output_dir
     dtype=dtype_from_args(args.datatype)
-
 
     # --------- Load data
     dataset_names = ["test"]
-    datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
+    if args.use_dc2:
+        datadir = "./tests/deepdisc/test_data/dc2/"
+    else:
+        datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
     t0 = time.time()
     dataset_dicts = {}
     for i, d in enumerate(dataset_names):
@@ -93,6 +98,8 @@ def return_predictor(
     cfg = LazyConfig.load(cfgfile)
 
     # --------- Setting a bunch of config stuff
+    cfg.OUTPUT_DIR = output_dir
+
     cfg.model.roi_heads.num_classes = args.nc
 
     for bp in cfg.model.roi_heads.box_predictors:
@@ -104,29 +111,67 @@ def return_predictor(
 
     cfg.train.init_checkpoint = os.path.join(output_dir, run_name)
 
-    # --------- Now we case predictor on model type (the second case has way different config vals it appears)
+    if args.use_dc2:
+        cfg.model.backbone.bottom_up.in_chans = 6
+        cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764]
+        cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009]
+
+        if args.use_redshift:
+            cfg.model.roi_heads.num_components=5
+            cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads
+            #cfg.zloss_factor = 1.0
+            #cfg.model.zloss_factor = 1.0
+            cfg.model.roi_heads.zloss_factor = 1.0 #! what's a reasonable default?
+
+        #! this maybe shouldn't have been a config value? or should we make a sep config for dc2?
+        cfg.classes = ["object"] 
+
+    # --------- Now we case predictor on model type, and if using dc2 data
 
     cfg.OUTPUT_DIR = output_dir
-    if bb in ['Swin','MViTv2']:
-        predictor= return_predictor_transformer(cfg)
+    if args.use_dc2:
+        output_dir = "."
+        if bb in ['Swin','MViTv2']:
+            predictor= return_predictor_transformer(cfg)
+        else:
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
+            predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=1, roi_thresh=roi_thresh)
+            #! nc should be in config, along with making sep config for dc2
     else:
-        cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
-        predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)
+        if bb in ['Swin','MViTv2']:
+            predictor= return_predictor_transformer(cfg)
+        else:
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
+            predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)
 
     # --------- 
-    def hsc_key_mapper(dataset_dict):
-        filenames = [
-            dataset_dict["filename_G"],
-            dataset_dict["filename_R"],
-            dataset_dict["filename_I"],
-        ]
-        return filenames
-    IR = HSCImageReader(norm=args.norm)
+    if args.use_dc2:
+        def dc2_key_mapper(dataset_dict):
+            filename = dataset_dict["filename"]
+            return filename
+        IR = DC2ImageReader(norm=args.norm)
+
+    else:
+        def hsc_key_mapper(dataset_dict):
+            filenames = [
+                dataset_dict["filename_G"],
+                dataset_dict["filename_R"],
+                dataset_dict["filename_I"],
+            ]
+            return filenames
+        IR = HSCImageReader(norm=args.norm)
 
     # --------- Do the thing
     t0 = time.time()
     print("Matching objects")
-    true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
+    if args.use_dc2:
+        true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
+        if args.use_redshift:
+            true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
+            print(true_zs)
+            print(f"{str(pred_pdfs)[:1000]}...")
+    else:
+        true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
     classes = np.array([true_classes, pred_classes])
 
     savename = f"{bb}_test_matched_classes.npy"

diff --git a/...run_scripts/solo_test_run_transformers.py → any_test_run_transformers.py b/...run_scripts/solo_test_run_transformers.py → any_test_run_transformers.py
@@ -1,10 +1,3 @@
-""" Training script for LazyConfig models.
-
-This uses the new "solo config" in which the previous yaml-style config
-(a Detectron CfgNode type called cfg_loader) is now bundled into the
-LazyConfig type cfg.
-"""
-
 try:
     # ignore ShapelyDeprecationWarning from fvcore
     import warnings
@@ -35,10 +28,10 @@
 from detectron2.engine import launch
 
 from deepdisc.data_format.augment_image import hsc_test_augs, train_augs
-from deepdisc.data_format.image_readers import HSCImageReader
+from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader
 from deepdisc.data_format.register_data import register_data_set
-from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader
-from deepdisc.model.models import return_lazy_model
+from deepdisc.model.loaders import DictMapper, RedshiftDictMapper, return_test_loader, return_train_loader
+from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model
 from deepdisc.training.trainers import (
     return_evallosshook,
     return_lazy_trainer,
@@ -61,19 +54,23 @@ def main(train_head, args):
     scheme = args.scheme
     alphas = args.alphas
     modname = args.modname
-    dtype = dtype_from_args(args.dtype) 
+    datatype = args.dtype
+    dtype = dtype_from_args(args.dtype)
 
     # Get file locations
     trainfile = dirpath + "single_test.json"
     testfile = dirpath + "single_test.json"
-    if modname == "swin":
-        cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py"
-    elif modname == "mvitv2":
-        cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py"
-    # Vitdet not currently available (cuda issues) so we're tabling it for now
-    #elif modname == "vitdet":
-    #    cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py"
-
+    if args.use_dc2:
+        if modname == "swin":
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py"
+        elif modname == "mvitv2":
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py"
+    else:
+        if modname == "swin":
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py"
+        elif modname == "mvitv2":
+            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py"
+
     # Load the config
     cfg = LazyConfig.load(cfgfile)
 
@@ -90,7 +87,7 @@ def main(train_head, args):
     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
 
     # Iterations for 15, 25, 35, 50 epochs
-    # TODOLIV could this stuff be moved to a config too?
+    #! could this stuff be moved to a config too?
     epoch = int(args.tl / cfg.dataloader.train.total_batch_size)
     e1 = 20
     e2 = epoch * 10
@@ -101,42 +98,55 @@ def main(train_head, args):
 
     if train_head:
         cfg.train.init_checkpoint = None # or initwfile, the path to your model
-
+    
         model = return_lazy_model(cfg)
 
-        cfg.optimizer.params.model = model
-        cfg.optimizer.lr = 0.001
-
         cfg.SOLVER.STEPS = []  # do not decay learning rate for retraining 
         cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
         cfg.SOLVER.WARMUP_ITERS = 0
         cfg.SOLVER.MAX_ITER = e1  # for DefaultTrainer
 
-        # optimizer = instantiate(cfg.optimizer)
+        cfg.optimizer.params.model = model
+        cfg.optimizer.lr = 0.001
         optimizer = return_optimizer(cfg)
 
-        # key_mapper function should take a dataset_dict as input and output a key used by the image_reader function
-        def hsc_key_mapper(dataset_dict):
-            filenames = [
-                dataset_dict["filename_G"],
-                dataset_dict["filename_R"],
-                dataset_dict["filename_I"],
-            ]
-            return filenames
-
-        IR = HSCImageReader(norm=args.norm)
-        mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data
-        loader = return_train_loader(cfg, mapper)
-        test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data
-        test_loader = return_test_loader(cfg, test_mapper)
+        if args.use_dc2:
+            # key_mapper function should take a dataset_dict as input and output a key used by the image_reader function
+            def dc2_key_mapper(dataset_dict):
+                filename = dataset_dict["filename"]
+                return filename
+            if args.use_redshift:
+                IR = DC2ImageReader()
+                mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data
+                loader = return_train_loader(cfg, mapper)
+                test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data
+                test_loader = return_test_loader(cfg, test_mapper)
+            else:
+                IR = DC2ImageReader(norm=args.norm)
+                mapper = DictMapper(IR, dc2_key_mapper, train_augs).map_data
+                loader = return_train_loader(cfg, mapper)
+                test_mapper = DictMapper(IR, dc2_key_mapper).map_data
+                test_loader = return_test_loader(cfg, test_mapper)
+        else:
+            def hsc_key_mapper(dataset_dict):
+                filenames = [
+                    dataset_dict["filename_G"],
+                    dataset_dict["filename_R"],
+                    dataset_dict["filename_I"],
+                ]
+                return filenames
+            IR = HSCImageReader(norm=args.norm)
+            mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data
+            loader = return_train_loader(cfg, mapper)
+            test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data
+            test_loader = return_test_loader(cfg, test_mapper)
 
         saveHook = return_savehook(output_name)
         lossHook = return_evallosshook(val_per, model, test_loader)
         schedulerHook = return_schedulerhook(optimizer)
         hookList = [lossHook, schedulerHook, saveHook]
 
         trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList)
-
         trainer.set_period(5)
         trainer.train(0, 20)
         if comm.is_main_process():

diff --git a/run_all.sh b/run_all.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+outfile="./run_all.log"
+
+run_start() {
+    echo "Run:" $(date -u) > $outfile
+    echo >> $outfile
+}
+
+run_line() {
+    echo python $* "..."
+
+    echo "python" $* "..." >> $outfile
+    echo >> $outfile
+    python $* >> $outfile
+    echo >> $outfile
+}
+
+run_end() {
+    echo "Done."
+}
+
+run_start
+
+### test_run_transormers combinations
+run_line any_test_run_transformers.py
+run_line any_test_run_transformers.py --modname mvitv2 --run-name mvitv2_test
+run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/"
+run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2 --run-name mvitv2_test
+run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/"
+run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2  --run-name mvitv2_test
+
+
+### test_eval_model combinations
+run_line any_test_eval_model.py
+run_line any_test_eval_model.py --run-name mvitv2_test
+run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
+
+# The redshift version here could use some looking at. I inferred it from the
+# corresoponding test_eval_DC2_redshift, but had to add:
+#   - cfg.model.roi_heads.zloss_factor = 1.0 (arbitrarily choosing 1.0 here)
+#   - adding the 3rd expected value from get_matched_z_pdfs.
+# The existence of new_get_matched_z_pdfs makes me think the original script
+# use a revisit, so there may be some outdated things I've copied over
+run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
+
+# Not working:
+# (RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0)
+#run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" --run-name mvitv2_test
+
+run_end