Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify test_run_transformers* and test_eval_model* scripts #98

Merged
merged 9 commits into from
Jan 19, 2024
81 changes: 63 additions & 18 deletions solo_run_scripts/solo_test_eval_model.py → any_test_eval_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import deepdisc.astrodet.astrodet as toolkit

from deepdisc.data_format.file_io import get_data_from_json
from deepdisc.data_format.image_readers import HSCImageReader
from deepdisc.inference.match_objects import get_matched_object_classes
from deepdisc.data_format.image_readers import HSCImageReader, DC2ImageReader
from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs
from deepdisc.inference.predictors import return_predictor_transformer
from deepdisc.model.models import RedshiftPDFCasROIHeads
from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser

from detectron2 import model_zoo
Expand Down Expand Up @@ -63,18 +64,22 @@ def return_predictor(
if __name__ == "__main__":
# --------- Handle args
args = make_inference_arg_parser().parse_args()
print("Command Line Args:", args)

roi_thresh = args.roi_thresh
run_name = args.run_name
testfile = args.testfile
savedir = args.savedir
Path(savedir).mkdir(parents=True, exist_ok=True)
output_dir = args.output_dir
dtype=dtype_from_args(args.datatype)


# --------- Load data
dataset_names = ["test"]
datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
if args.use_dc2:
datadir = "./tests/deepdisc/test_data/dc2/"
else:
datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
t0 = time.time()
dataset_dicts = {}
for i, d in enumerate(dataset_names):
Expand All @@ -93,6 +98,8 @@ def return_predictor(
cfg = LazyConfig.load(cfgfile)

# --------- Setting a bunch of config stuff
cfg.OUTPUT_DIR = output_dir

cfg.model.roi_heads.num_classes = args.nc

for bp in cfg.model.roi_heads.box_predictors:
Expand All @@ -104,29 +111,67 @@ def return_predictor(

cfg.train.init_checkpoint = os.path.join(output_dir, run_name)

# --------- Now we case predictor on model type (the second case has way different config vals it appears)
if args.use_dc2:
cfg.model.backbone.bottom_up.in_chans = 6
cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764]
cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009]

if args.use_redshift:
cfg.model.roi_heads.num_components=5
cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads
#cfg.zloss_factor = 1.0
#cfg.model.zloss_factor = 1.0
cfg.model.roi_heads.zloss_factor = 1.0 #! what's a reasonable default?

#! this maybe shouldn't have been a config value? or should we make a sep config for dc2?
cfg.classes = ["object"]

# --------- Now we case predictor on model type, and if using dc2 data

cfg.OUTPUT_DIR = output_dir
if bb in ['Swin','MViTv2']:
predictor= return_predictor_transformer(cfg)
if args.use_dc2:
output_dir = "."
if bb in ['Swin','MViTv2']:
predictor= return_predictor_transformer(cfg)
else:
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=1, roi_thresh=roi_thresh)
#! nc should be in config, along with making sep config for dc2
else:
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)
if bb in ['Swin','MViTv2']:
predictor= return_predictor_transformer(cfg)
else:
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)

# ---------
def hsc_key_mapper(dataset_dict):
filenames = [
dataset_dict["filename_G"],
dataset_dict["filename_R"],
dataset_dict["filename_I"],
]
return filenames
IR = HSCImageReader(norm=args.norm)
if args.use_dc2:
def dc2_key_mapper(dataset_dict):
filename = dataset_dict["filename"]
return filename
IR = DC2ImageReader(norm=args.norm)

else:
def hsc_key_mapper(dataset_dict):
filenames = [
dataset_dict["filename_G"],
dataset_dict["filename_R"],
dataset_dict["filename_I"],
]
return filenames
IR = HSCImageReader(norm=args.norm)

# --------- Do the thing
t0 = time.time()
print("Matching objects")
true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
if args.use_dc2:
true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
if args.use_redshift:
true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
print(true_zs)
print(f"{str(pred_pdfs)[:1000]}...")
else:
true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
classes = np.array([true_classes, pred_classes])

savename = f"{bb}_test_matched_classes.npy"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
""" Training script for LazyConfig models.

This uses the new "solo config" in which the previous yaml-style config
(a Detectron CfgNode type called cfg_loader) is now bundled into the
LazyConfig type cfg.
"""

try:
# ignore ShapelyDeprecationWarning from fvcore
import warnings
Expand Down Expand Up @@ -35,10 +28,10 @@
from detectron2.engine import launch

from deepdisc.data_format.augment_image import hsc_test_augs, train_augs
from deepdisc.data_format.image_readers import HSCImageReader
from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader
from deepdisc.data_format.register_data import register_data_set
from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader
from deepdisc.model.models import return_lazy_model
from deepdisc.model.loaders import DictMapper, RedshiftDictMapper, return_test_loader, return_train_loader
from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model
from deepdisc.training.trainers import (
return_evallosshook,
return_lazy_trainer,
Expand All @@ -61,19 +54,23 @@ def main(train_head, args):
scheme = args.scheme
alphas = args.alphas
modname = args.modname
dtype = dtype_from_args(args.dtype)
datatype = args.dtype
dtype = dtype_from_args(args.dtype)

# Get file locations
trainfile = dirpath + "single_test.json"
testfile = dirpath + "single_test.json"
if modname == "swin":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py"
elif modname == "mvitv2":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py"
# Vitdet not currently available (cuda issues) so we're tabling it for now
#elif modname == "vitdet":
# cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py"

if args.use_dc2:
if modname == "swin":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py"
elif modname == "mvitv2":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py"
else:
if modname == "swin":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py"
elif modname == "mvitv2":
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py"

# Load the config
cfg = LazyConfig.load(cfgfile)

Expand All @@ -90,7 +87,7 @@ def main(train_head, args):
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# Iterations for 15, 25, 35, 50 epochs
# TODOLIV could this stuff be moved to a config too?
#! could this stuff be moved to a config too?
epoch = int(args.tl / cfg.dataloader.train.total_batch_size)
e1 = 20
e2 = epoch * 10
Expand All @@ -101,42 +98,55 @@ def main(train_head, args):

if train_head:
cfg.train.init_checkpoint = None # or initwfile, the path to your model

model = return_lazy_model(cfg)

cfg.optimizer.params.model = model
cfg.optimizer.lr = 0.001

cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining
cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
cfg.SOLVER.WARMUP_ITERS = 0
cfg.SOLVER.MAX_ITER = e1 # for DefaultTrainer

# optimizer = instantiate(cfg.optimizer)
cfg.optimizer.params.model = model
cfg.optimizer.lr = 0.001
optimizer = return_optimizer(cfg)

# key_mapper function should take a dataset_dict as input and output a key used by the image_reader function
def hsc_key_mapper(dataset_dict):
filenames = [
dataset_dict["filename_G"],
dataset_dict["filename_R"],
dataset_dict["filename_I"],
]
return filenames

IR = HSCImageReader(norm=args.norm)
mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data
loader = return_train_loader(cfg, mapper)
test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data
test_loader = return_test_loader(cfg, test_mapper)
if args.use_dc2:
# key_mapper function should take a dataset_dict as input and output a key used by the image_reader function
def dc2_key_mapper(dataset_dict):
filename = dataset_dict["filename"]
return filename
if args.use_redshift:
IR = DC2ImageReader()
mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data
loader = return_train_loader(cfg, mapper)
test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data
test_loader = return_test_loader(cfg, test_mapper)
else:
IR = DC2ImageReader(norm=args.norm)
mapper = DictMapper(IR, dc2_key_mapper, train_augs).map_data
loader = return_train_loader(cfg, mapper)
test_mapper = DictMapper(IR, dc2_key_mapper).map_data
test_loader = return_test_loader(cfg, test_mapper)
else:
def hsc_key_mapper(dataset_dict):
filenames = [
dataset_dict["filename_G"],
dataset_dict["filename_R"],
dataset_dict["filename_I"],
]
return filenames
IR = HSCImageReader(norm=args.norm)
mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data
loader = return_train_loader(cfg, mapper)
test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data
test_loader = return_test_loader(cfg, test_mapper)

saveHook = return_savehook(output_name)
lossHook = return_evallosshook(val_per, model, test_loader)
schedulerHook = return_schedulerhook(optimizer)
hookList = [lossHook, schedulerHook, saveHook]

trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList)

trainer.set_period(5)
trainer.train(0, 20)
if comm.is_main_process():
Expand Down
51 changes: 51 additions & 0 deletions run_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

outfile="./run_all.log"

run_start() {
echo "Run:" $(date -u) > $outfile
echo >> $outfile
}

run_line() {
echo python $* "..."

echo "python" $* "..." >> $outfile
echo >> $outfile
python $* >> $outfile
echo >> $outfile
}

run_end() {
echo "Done."
}

run_start

### test_run_transormers combinations
run_line any_test_run_transformers.py
run_line any_test_run_transformers.py --modname mvitv2 --run-name mvitv2_test
run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/"
run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2 --run-name mvitv2_test
run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/"
run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2 --run-name mvitv2_test


### test_eval_model combinations
run_line any_test_eval_model.py
run_line any_test_eval_model.py --run-name mvitv2_test
run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json"

# The redshift version here could use some looking at. I inferred it from the
# corresoponding test_eval_DC2_redshift, but had to add:
# - cfg.model.roi_heads.zloss_factor = 1.0 (arbitrarily choosing 1.0 here)
# - adding the 3rd expected value from get_matched_z_pdfs.
# The existence of new_get_matched_z_pdfs makes me think the original script
# use a revisit, so there may be some outdated things I've copied over
run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"

# Not working:
# (RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0)
#run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" --run-name mvitv2_test

run_end
Loading