diff --git a/solo_run_scripts/solo_test_eval_model.py b/any_test_eval_model.py similarity index 56% rename from solo_run_scripts/solo_test_eval_model.py rename to any_test_eval_model.py index e13585e..85d188f 100644 --- a/solo_run_scripts/solo_test_eval_model.py +++ b/any_test_eval_model.py @@ -10,9 +10,10 @@ import deepdisc.astrodet.astrodet as toolkit from deepdisc.data_format.file_io import get_data_from_json -from deepdisc.data_format.image_readers import HSCImageReader -from deepdisc.inference.match_objects import get_matched_object_classes +from deepdisc.data_format.image_readers import HSCImageReader, DC2ImageReader +from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs from deepdisc.inference.predictors import return_predictor_transformer +from deepdisc.model.models import RedshiftPDFCasROIHeads from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser from detectron2 import model_zoo @@ -63,6 +64,8 @@ def return_predictor( if __name__ == "__main__": # --------- Handle args args = make_inference_arg_parser().parse_args() + print("Command Line Args:", args) + roi_thresh = args.roi_thresh run_name = args.run_name testfile = args.testfile @@ -70,11 +73,13 @@ def return_predictor( Path(savedir).mkdir(parents=True, exist_ok=True) output_dir = args.output_dir dtype=dtype_from_args(args.datatype) - # --------- Load data dataset_names = ["test"] - datadir = "/home/shared/hsc/HSC/HSC_DR3/data/" + if args.use_dc2: + datadir = "./tests/deepdisc/test_data/dc2/" + else: + datadir = "/home/shared/hsc/HSC/HSC_DR3/data/" t0 = time.time() dataset_dicts = {} for i, d in enumerate(dataset_names): @@ -93,6 +98,8 @@ def return_predictor( cfg = LazyConfig.load(cfgfile) # --------- Setting a bunch of config stuff + cfg.OUTPUT_DIR = output_dir + cfg.model.roi_heads.num_classes = args.nc for bp in cfg.model.roi_heads.box_predictors: @@ -104,29 +111,67 @@ def return_predictor( cfg.train.init_checkpoint = os.path.join(output_dir, run_name) - # --------- Now we case predictor on model type (the second case has way different config vals it appears) + if args.use_dc2: + cfg.model.backbone.bottom_up.in_chans = 6 + cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764] + cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009] + + if args.use_redshift: + cfg.model.roi_heads.num_components=5 + cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads + #cfg.zloss_factor = 1.0 + #cfg.model.zloss_factor = 1.0 + cfg.model.roi_heads.zloss_factor = 1.0 #! what's a reasonable default? + + #! this maybe shouldn't have been a config value? or should we make a sep config for dc2? + cfg.classes = ["object"] + + # --------- Now we case predictor on model type, and if using dc2 data cfg.OUTPUT_DIR = output_dir - if bb in ['Swin','MViTv2']: - predictor= return_predictor_transformer(cfg) + if args.use_dc2: + output_dir = "." + if bb in ['Swin','MViTv2']: + predictor= return_predictor_transformer(cfg) + else: + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py" + predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=1, roi_thresh=roi_thresh) + #! nc should be in config, along with making sep config for dc2 else: - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py" - predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) + if bb in ['Swin','MViTv2']: + predictor= return_predictor_transformer(cfg) + else: + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py" + predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) # --------- - def hsc_key_mapper(dataset_dict): - filenames = [ - dataset_dict["filename_G"], - dataset_dict["filename_R"], - dataset_dict["filename_I"], - ] - return filenames - IR = HSCImageReader(norm=args.norm) + if args.use_dc2: + def dc2_key_mapper(dataset_dict): + filename = dataset_dict["filename"] + return filename + IR = DC2ImageReader(norm=args.norm) + + else: + def hsc_key_mapper(dataset_dict): + filenames = [ + dataset_dict["filename_G"], + dataset_dict["filename_R"], + dataset_dict["filename_I"], + ] + return filenames + IR = HSCImageReader(norm=args.norm) # --------- Do the thing t0 = time.time() print("Matching objects") - true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor) + if args.use_dc2: + true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor) + if args.use_redshift: + true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor) + print(true_zs) + print(f"{str(pred_pdfs)[:1000]}...") + else: + true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor) classes = np.array([true_classes, pred_classes]) savename = f"{bb}_test_matched_classes.npy" diff --git a/solo_run_scripts/solo_test_run_transformers.py b/any_test_run_transformers.py similarity index 60% rename from solo_run_scripts/solo_test_run_transformers.py rename to any_test_run_transformers.py index 58fb058..a68705d 100644 --- a/solo_run_scripts/solo_test_run_transformers.py +++ b/any_test_run_transformers.py @@ -1,10 +1,3 @@ -""" Training script for LazyConfig models. - -This uses the new "solo config" in which the previous yaml-style config -(a Detectron CfgNode type called cfg_loader) is now bundled into the -LazyConfig type cfg. -""" - try: # ignore ShapelyDeprecationWarning from fvcore import warnings @@ -35,10 +28,10 @@ from detectron2.engine import launch from deepdisc.data_format.augment_image import hsc_test_augs, train_augs -from deepdisc.data_format.image_readers import HSCImageReader +from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import return_lazy_model +from deepdisc.model.loaders import DictMapper, RedshiftDictMapper, return_test_loader, return_train_loader +from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model from deepdisc.training.trainers import ( return_evallosshook, return_lazy_trainer, @@ -61,19 +54,23 @@ def main(train_head, args): scheme = args.scheme alphas = args.alphas modname = args.modname - dtype = dtype_from_args(args.dtype) + datatype = args.dtype + dtype = dtype_from_args(args.dtype) # Get file locations trainfile = dirpath + "single_test.json" testfile = dirpath + "single_test.json" - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py" - elif modname == "mvitv2": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py" - # Vitdet not currently available (cuda issues) so we're tabling it for now - #elif modname == "vitdet": - # cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - + if args.use_dc2: + if modname == "swin": + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py" + elif modname == "mvitv2": + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py" + else: + if modname == "swin": + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep.py" + elif modname == "mvitv2": + cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep.py" + # Load the config cfg = LazyConfig.load(cfgfile) @@ -90,7 +87,7 @@ def main(train_head, args): os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) # Iterations for 15, 25, 35, 50 epochs - # TODOLIV could this stuff be moved to a config too? + #! could this stuff be moved to a config too? epoch = int(args.tl / cfg.dataloader.train.total_batch_size) e1 = 20 e2 = epoch * 10 @@ -101,34 +98,48 @@ def main(train_head, args): if train_head: cfg.train.init_checkpoint = None # or initwfile, the path to your model - + model = return_lazy_model(cfg) - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" cfg.SOLVER.WARMUP_ITERS = 0 cfg.SOLVER.MAX_ITER = e1 # for DefaultTrainer - # optimizer = instantiate(cfg.optimizer) + cfg.optimizer.params.model = model + cfg.optimizer.lr = 0.001 optimizer = return_optimizer(cfg) - # key_mapper function should take a dataset_dict as input and output a key used by the image_reader function - def hsc_key_mapper(dataset_dict): - filenames = [ - dataset_dict["filename_G"], - dataset_dict["filename_R"], - dataset_dict["filename_I"], - ] - return filenames - - IR = HSCImageReader(norm=args.norm) - mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data - loader = return_train_loader(cfg, mapper) - test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data - test_loader = return_test_loader(cfg, test_mapper) + if args.use_dc2: + # key_mapper function should take a dataset_dict as input and output a key used by the image_reader function + def dc2_key_mapper(dataset_dict): + filename = dataset_dict["filename"] + return filename + if args.use_redshift: + IR = DC2ImageReader() + mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data + loader = return_train_loader(cfg, mapper) + test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data + test_loader = return_test_loader(cfg, test_mapper) + else: + IR = DC2ImageReader(norm=args.norm) + mapper = DictMapper(IR, dc2_key_mapper, train_augs).map_data + loader = return_train_loader(cfg, mapper) + test_mapper = DictMapper(IR, dc2_key_mapper).map_data + test_loader = return_test_loader(cfg, test_mapper) + else: + def hsc_key_mapper(dataset_dict): + filenames = [ + dataset_dict["filename_G"], + dataset_dict["filename_R"], + dataset_dict["filename_I"], + ] + return filenames + IR = HSCImageReader(norm=args.norm) + mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data + loader = return_train_loader(cfg, mapper) + test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data + test_loader = return_test_loader(cfg, test_mapper) saveHook = return_savehook(output_name) lossHook = return_evallosshook(val_per, model, test_loader) @@ -136,7 +147,6 @@ def hsc_key_mapper(dataset_dict): hookList = [lossHook, schedulerHook, saveHook] trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) - trainer.set_period(5) trainer.train(0, 20) if comm.is_main_process(): diff --git a/run_all.sh b/run_all.sh new file mode 100644 index 0000000..ee88914 --- /dev/null +++ b/run_all.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +outfile="./run_all.log" + +run_start() { + echo "Run:" $(date -u) > $outfile + echo >> $outfile +} + +run_line() { + echo python $* "..." + + echo "python" $* "..." >> $outfile + echo >> $outfile + python $* >> $outfile + echo >> $outfile +} + +run_end() { + echo "Done." +} + +run_start + +### test_run_transormers combinations +run_line any_test_run_transformers.py +run_line any_test_run_transformers.py --modname mvitv2 --run-name mvitv2_test +run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/" +run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2 --run-name mvitv2_test +run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/" +run_line any_test_run_transformers.py --use-dc2 --use-redshift --data-dir "./tests/deepdisc/test_data/dc2/" --modname mvitv2 --run-name mvitv2_test + + +### test_eval_model combinations +run_line any_test_eval_model.py +run_line any_test_eval_model.py --run-name mvitv2_test +run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" + +# The redshift version here could use some looking at. I inferred it from the +# corresoponding test_eval_DC2_redshift, but had to add: +# - cfg.model.roi_heads.zloss_factor = 1.0 (arbitrarily choosing 1.0 here) +# - adding the 3rd expected value from get_matched_z_pdfs. +# The existence of new_get_matched_z_pdfs makes me think the original script +# use a revisit, so there may be some outdated things I've copied over +run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json" + +# Not working: +# (RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0) +#run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" --run-name mvitv2_test + +run_end \ No newline at end of file diff --git a/solo_run_scripts/solo_test_run_transformers_DC2.py b/solo_run_scripts/solo_test_run_transformers_DC2.py deleted file mode 100644 index 7525da1..0000000 --- a/solo_run_scripts/solo_test_run_transformers_DC2.py +++ /dev/null @@ -1,170 +0,0 @@ -""" Training script for LazyConfig models. - -This uses the new "solo config" in which the previous yaml-style config -(a Detectron CfgNode type called cfg_loader) is now bundled into the -LazyConfig type cfg. -""" - -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - from shapely.errors import ShapelyDeprecationWarning - warnings.filterwarnings("ignore", category=sShapelyDeprecationWarning) -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger -setup_logger() - -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch - -from deepdisc.data_format.augment_image import train_augs -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - ssl._create_default_https_context = ssl._create_unverified_context - - # Handle args - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - datatype = args.dtype - if datatype == 8: - dtype = np.uint8 - elif datatype == 16: - dtype = np.int16 - - # Get file locations - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py" - elif modname == "mvitv2": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py" - # Vitdet not currently available (cuda issues) so we're tabling it for now - #elif modname == "vitdet": - # cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - - # Load the config - cfg = LazyConfig.load(cfgfile) - - # Register the data sets - astrotrain_metadata = register_data_set( - cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes - ) - astroval_metadata = register_data_set( - cfg.DATASETS.TEST, testfile, thing_classes=cfg.metadata.classes - ) - - # Set the output directory - cfg.OUTPUT_DIR = output_dir - os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) - - # Iterations for 15, 25, 35, 50 epochs - # TODOLIV could this stuff be moved to a config too? - epoch = int(args.tl / cfg.dataloader.train.total_batch_size) - e1 = 20 - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = 5 - - if train_head: - cfg.train.init_checkpoint = None # or initwfile, the path to your model - - model = return_lazy_model(cfg) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg.SOLVER.WARMUP_ITERS = 0 - cfg.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - optimizer = return_optimizer(cfg) - - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - IR = DC2ImageReader(norm=args.norm) - mapper = DictMapper(IR, dc2_key_mapper, train_augs).map_data - loader = return_train_loader(cfg, mapper) - test_mapper = DictMapper(IR, dc2_key_mapper).map_data - test_loader = return_test_loader(cfg, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - hookList = [lossHook, schedulerHook, saveHook] - - trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) - - trainer.set_period(5) - trainer.train(0, 20) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds") - \ No newline at end of file diff --git a/solo_run_scripts/solo_test_run_transformers_DC2_redshift.py b/solo_run_scripts/solo_test_run_transformers_DC2_redshift.py deleted file mode 100644 index ea83fca..0000000 --- a/solo_run_scripts/solo_test_run_transformers_DC2_redshift.py +++ /dev/null @@ -1,170 +0,0 @@ -""" Training script for LazyConfig models. - -This uses the new "solo config" in which the previous yaml-style config -(a Detectron CfgNode type called cfg_loader) is now bundled into the -LazyConfig type cfg. -""" - -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - from shapely.errors import ShapelyDeprecationWarning - warnings.filterwarnings("ignore", category=sShapelyDeprecationWarning) -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger -setup_logger() - -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch - -from deepdisc.data_format.augment_image import train_augs -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import RedshiftDictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - ssl._create_default_https_context = ssl._create_unverified_context - - # Handle args - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - datatype = args.dtype - if datatype == 8: - dtype = np.uint8 - elif datatype == 16: - dtype = np.int16 - - # Get file locations - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py" - elif modname == "mvitv2": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py" - # Vitdet not currently available (cuda issues) so we're tabling it for now - #elif modname == "vitdet": - # cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - - # Load the config - cfg = LazyConfig.load(cfgfile) - - # Register the data sets - astrotrain_metadata = register_data_set( - cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes - ) - astroval_metadata = register_data_set( - cfg.DATASETS.TEST, testfile, thing_classes=cfg.metadata.classes - ) - - # Set the output directory - cfg.OUTPUT_DIR = output_dir - os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) - - # Iterations for 15, 25, 35, 50 epochs - # TODOLIV could this stuff be moved to a config too? - epoch = int(args.tl / cfg.dataloader.train.total_batch_size) - e1 = 20 - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = 5 - - if train_head: - cfg.train.init_checkpoint = None # or initwfile, the path to your model - - model = return_lazy_model(cfg) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg.SOLVER.WARMUP_ITERS = 0 - cfg.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - optimizer = return_optimizer(cfg) - - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - IR = DC2ImageReader() - mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data - loader = return_train_loader(cfg, mapper) - test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data - test_loader = return_test_loader(cfg, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - hookList = [lossHook, schedulerHook, saveHook] - - trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) - - trainer.set_period(5) - trainer.train(0, 20) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds") - diff --git a/solo_run_transformers_DC2_redshift.py b/solo_run_transformers_DC2_redshift.py deleted file mode 100644 index 95a9d8d..0000000 --- a/solo_run_transformers_DC2_redshift.py +++ /dev/null @@ -1,197 +0,0 @@ -""" Training script for LazyConfig models. - -This uses the new "solo config" in which the previous yaml-style config -(a Detectron CfgNode type called cfg_loader) is now bundled into the -LazyConfig type cfg. -""" - -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - from shapely.errors import ShapelyDeprecationWarning - - warnings.filterwarnings("ignore", category=sShapelyDeprecationWarning) -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch -from detectron2.engine.defaults import create_ddp_model -from detectron2.config import instantiate - -from deepdisc.data_format.augment_image import train_augs -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import RedshiftDictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - - ssl._create_default_https_context = ssl._create_unverified_context - - # Handle args - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - datatype = args.dtype - if datatype == 8: - dtype = np.uint8 - elif datatype == 16: - dtype = np.int16 - - # Get file locations - #trainfile = dirpath + "train_scarlet_public.json" - #testfile = dirpath + "test_scarlet_public.json" - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2_redshift.py" - elif modname == "mvitv2": - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_mvitv2_b_in21k_100ep_DC2.py" - # Vitdet not currently available (cuda issues) so we're tabling it for now - # elif modname == "vitdet": - # cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - - # Load the config - cfg = LazyConfig.load(cfgfile) - - # Register the data sets - astrotrain_metadata = register_data_set(cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes) - astroval_metadata = register_data_set(cfg.DATASETS.TEST, testfile, thing_classes=cfg.metadata.classes) - - # Set the output directory - cfg.OUTPUT_DIR = output_dir - os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) - - # Iterations for 15, 25, 35, 50 epochs - # TODOLIV could this stuff be moved to a config too? - #epoch = int(1000 / cfg.dataloader.train.total_batch_size) - epoch = 500 - e1 = epoch - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = epoch - - - if train_head: - # cfg.train.init_checkpoint = None # or initwfile, the path to your model - cfg.train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" - # model = return_lazy_model(cfg) - - model = instantiate(cfg.model) - - #for param in model.parameters(): - # param.requires_grad = False - ## Phase 1: Unfreeze only the roi_heads - #for param in model.roi_heads.parameters(): - # param.requires_grad = True - ## Phase 2: Unfreeze region proposal generator with reduced lr - #for param in model.proposal_generator.parameters(): - # param.requires_grad = True - - model.to(cfg.train.device) - model = create_ddp_model(model, **cfg.train.ddp) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg.SOLVER.WARMUP_ITERS = 0 - cfg.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - optimizer = return_optimizer(cfg) - - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - #def dc2_key_mapper(dataset_dict): - # filename = dataset_dict["filename"] - # print(filename) - # base = filename.split(".")[0].split("/")[-1] - # dirpath = "/home/g4merz/DC2/nersc_data/scarlet_data" - # fn = os.path.join(dirpath, base) + ".npy" - # return fn - - IR = DC2ImageReader() - mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data - loader = return_train_loader(cfg, mapper) - test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data - test_loader = return_test_loader(cfg, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - # hookList = [lossHook, schedulerHook, saveHook] - hookList = [schedulerHook, saveHook] - trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) - - trainer.set_period(epoch // 2) - trainer.train(0, e1) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds") diff --git a/solo_test_eval_DC2_redshift.py b/solo_test_eval_DC2_redshift.py deleted file mode 100644 index 78fe4fd..0000000 --- a/solo_test_eval_DC2_redshift.py +++ /dev/null @@ -1,188 +0,0 @@ -""" -This code will read in a trained model and output the classes for predicted objects matched to the ground truth - -""" -import logging -import os -import time - -import numpy as np -import deepdisc.astrodet.astrodet as toolkit - -from deepdisc.data_format.file_io import get_data_from_json -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs, run_batched_match_class, run_batched_match_redshift -from deepdisc.inference.predictors import return_predictor_transformer -from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser -from deepdisc.model.loaders import RedshiftDictMapperEval, return_test_loader, return_train_loader - - -from detectron2 import model_zoo -from detectron2.config import LazyConfig -from detectron2.data import MetadataCatalog -from detectron2.utils.logger import setup_logger -import detectron2.data as d2data - -from pathlib import Path -from detectron2.engine import launch - -setup_logger() -logger = logging.getLogger(__name__) -import torch.distributed as dist - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: - - -import torch.distributed as dist - - -def gather_predictions(array, array_list=None, root=0, group=None): - """ - Sends tensor to root process, which store it in tensor_list. - """ - - rank = dist.get_rank() - if group is None: - group = dist.group.WORLD - if rank == root: - assert(array_list is not None) - dist.gather_object(array, object_gather_list=array_list, group=group) - else: - dist.gather_object(array, dst=root, group=group) - - - -def main(args): - size = args.num_gpus * args.num_machines - - # --------- Handle args - roi_thresh = args.roi_thresh - run_name = args.run_name - testfile = args.testfile - savedir = args.savedir - Path(savedir).mkdir(parents=True, exist_ok=True) - output_dir = args.output_dir - dtype=dtype_from_args(args.datatype) - - # --------- Load data - dataset_names = ["test"] - t0 = time.time() - dataset_dicts = {} - for i, d in enumerate(dataset_names): - dataset_dicts[d] = get_data_from_json(testfile) - print("Took ", time.time() - t0, "seconds to load samples") - - # Local vars/metadata - #classes = ["star", "galaxy"] - bb = args.run_name.split("_")[0] # backbone - - # --------- Start config stuff - - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2_redshift.py" - cfg = LazyConfig.load(cfgfile) - - # --------- Setting a bunch of config stuff - - cfg.train.init_checkpoint = os.path.join(output_dir, run_name) - - # --------- Now we case predictor on model type (the second case has way different config vals it appears) - - cfg.OUTPUT_DIR = output_dir - if bb in ['Swin','MViTv2']: - predictor= return_predictor_transformer(cfg) - else: - cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_DC2.py" - predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) - - # --------- - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - - #def dc2_key_mapper(dataset_dict): - # filename = dataset_dict["filename"] - # base = filename.split(".")[0].split("/")[-1] - # dirpath = "/home/g4merz/DC2/nersc_data/scarlet_data" - # fn = os.path.join(dirpath, base) + ".npy" - # return fn - - IR = DC2ImageReader() - - - mapper = RedshiftDictMapperEval(IR, dc2_key_mapper).map_data - - #loader = return_test_loader(cfg, mapper) - loader = d2data.build_detection_test_loader( - dataset_dicts['test'], mapper=mapper, batch_size=1 - ) - - - # --------- Do the thing - t0 = time.time() - print("Matching objects") - #true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor) - #true_zs, pred_pdfs = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor) - - true_classes, pred_classes = run_batched_match_class(loader, predictor) - - classes = np.array([true_classes, pred_classes]) - - - true_zs, pred_pdfs, ids = run_batched_match_redshift(loader, predictor, ids=True) - - if size==1: - np.save(os.path.join(args.savedir,'predicted_pdfs.npy'),pred_pdfs) - np.save(os.path.join(args.savedir,'true_zs.npy'),true_zs) - np.save(os.path.join(args.savedir,'ids.npy'),ids) - - return - - - else: - #size is the world size - true_zlist = [None for _ in range(size)] - pred_zlist = [None for _ in range(size)] - id_list = [None for _ in range(size)] - - if dist.get_rank() == 0: - gather_predictions(true_zs, true_zlist) - gather_predictions(pred_pdfs, pred_zlist) - gather_predictions(ids, id_list) - - else: - gather_predictions(true_zs) - gather_predictions(pred_pdfs) - gather_predictions(ids) - - - if dist.get_rank() == 0: - #pred_zlist = np.concatenate([pred_list for pred_list in pred_zlist]) - np.save(os.path.join(args.savedir,'predicted_pdfs.npy'),pred_zlist) - np.save(os.path.join(args.savedir,'true_zs.npy'),true_zlist) - np.save(os.path.join(args.savedir,'ids.npy'),id_list) - - return - - -if __name__ == "__main__": - args = make_inference_arg_parser().parse_args() - - print('Inference') - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - args, - ), - ) - - - print(f"Took {time.time()-t0} seconds") - \ No newline at end of file diff --git a/src/deepdisc/astrodet/astrodet.py b/src/deepdisc/astrodet/astrodet.py index d834b4e..49168e7 100644 --- a/src/deepdisc/astrodet/astrodet.py +++ b/src/deepdisc/astrodet/astrodet.py @@ -99,6 +99,7 @@ from tabulate import tabulate from torch.nn.parallel import DistributedDataParallel +from deepdisc.astrodet import detectron as detectron_addons def set_mpl_style(): """Function to set MPL style""" diff --git a/src/deepdisc/utils/parse_arguments.py b/src/deepdisc/utils/parse_arguments.py index 50675d5..3b7a1ea 100644 --- a/src/deepdisc/utils/parse_arguments.py +++ b/src/deepdisc/utils/parse_arguments.py @@ -42,6 +42,9 @@ def make_inference_arg_parser(): "https://pytorch.org/docs/stable/distributed.html for details.", ) + # To differentiate the kind of run + parser.add_argument("--use-dc2", default=False, action="store_true") + parser.add_argument("--use-redshift", default=False, action="store_true") return parser @@ -106,6 +109,10 @@ def make_training_arg_parser(epilog=None): "See documentation of `DefaultTrainer.resume_or_load()` for what it means.", ) run_args.add_argument("--run-name", type=str, default="Swin_test", help="output name for run") + + # To differentiate the kind of run + run_args.add_argument("--use-dc2", default=False, action="store_true") + run_args.add_argument("--use-redshift", default=False, action="store_true") # Add arguments for the machine specifications machine_args = parser.add_argument_group("Machine arguments") diff --git a/test_eval_model.py b/test_eval_model.py deleted file mode 100644 index e496371..0000000 --- a/test_eval_model.py +++ /dev/null @@ -1,215 +0,0 @@ -# Some basic setup: - -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import logging -import os -import time - - -# from google.colab.patches import cv2_imshow - -# import some common libraries -import numpy as np - -# import some common detectron2 utilities -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog - -import deepdisc.astrodet.astrodet as toolkit - -logger = logging.getLogger(__name__) -from pathlib import Path - - -from deepdisc.data_format.file_io import get_data_from_json -from deepdisc.data_format.image_readers import HSCImageReader -from deepdisc.inference.match_objects import get_matched_object_classes -from deepdisc.inference.predictors import return_predictor_transformer -from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser - -""" -This code will read in a trained model and output the classes for predicted objects matched to the ground truth - -""" - -args = make_inference_arg_parser().parse_args() - -output_dir = args.output_dir -roi_thresh = args.roi_thresh -savedir = args.savedir -print(savedir) -Path(savedir).mkdir(parents=True, exist_ok=True) -run_name = args.run_name - - -testfile = args.testfile - -classes = ["star", "galaxy"] - -dataset_names = ["test"] -datadir = "/home/shared/hsc/HSC/HSC_DR3/data/" -t0 = time.time() -dataset_dicts = {} -for i, d in enumerate(dataset_names): - dataset_dicts[d] = get_data_from_json(testfile) - - -print("Took ", time.time() - t0, "seconds to load samples") - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: - - -def return_predictor( - cfgfile, run_name, nc=1, output_dir="/home/shared/hsc/HSC/HSC_DR3/models/noclass/", roi_thresh=0.5 -): - """ - This function returns a trained model and its config file. - Used for models that have yacs config files - - Parameters - ---------- - cfgfile: str - A path to a model config file, provided by the detectron2 repo - run_name: str - Prefix used for the name of the saved model - nc: int - Number of classes used in the model - output_dir: str - THe directory to save metric outputs - roi_thresh: float - Hyperparamter that functions as a detection sensitivity level - - """ - - cfg = get_cfg() - cfg.merge_from_file(model_zoo.get_config_file(cfgfile)) # Get model structure - cfg.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg.DATASETS.TEST = ("astro_test",) # Config calls this TEST, but it should be the val dataset - cfg.DATALOADER.NUM_WORKERS = 1 - cfg.SOLVER.IMS_PER_BATCH = ( - 4 # this is images per iteration. 1 epoch is len(images)/(ims_per_batch iterations*num_gpus) - ) - cfg.SOLVER.BASE_LR = 0.001 - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.MAX_ITER = 100 # for DefaultTrainer - cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( - 512 # faster, and good enough for this toy dataset (default: 512) - ) - cfg.MODEL.ROI_HEADS.NUM_CLASSES = nc - cfg.OUTPUT_DIR = output_dir - cfg.TEST.DETECTIONS_PER_IMAGE = 1000 - cfg.INPUT.MIN_SIZE_TRAIN = 1025 - cfg.INPUT.MAX_SIZE_TRAIN = 1050 - - # Defaults - # PRE_NMS_TOPK_TEST: 6000 - # POST_NMS_TOPK_TEST: 1000 - # PRE_NMS_TOPK_TRAIN: 12000 - # POST_NMS_TOPK_TRAIN: 2000 - - cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 6000 - cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 - - cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512 - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8, 16, 32, 64, 128]] - - cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, run_name) # path to the model we just trained - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_thresh # set a custom testing threshold - cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3 - cfg.MODEL.ROI_BOX_HEAD.GAMMA = 1 - cfg.MODEL.ROI_BOX_HEAD.ALPHAS = None - - predictor = toolkit.AstroPredictor(cfg) - - return predictor, cfg - - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: -from detectron2.config import LazyConfig - -bb = args.run_name.split("_")[0] -cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - -cfg = LazyConfig.load(cfgfile) - -metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids -classes = metadata.thing_classes - -cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] -cfg.dataloader.train.total_batch_size = 4 -cfg.model.roi_heads.num_classes = args.nc -cfg.model.roi_heads.batch_size_per_image = 512 - -for bp in cfg.model.roi_heads.box_predictors: - bp.test_score_thresh = roi_thresh -cfg.model.proposal_generator.pre_nms_topk = [6000, 6000] -cfg.model.proposal_generator.post_nms_topk = [6000, 6000] -cfg.model.proposal_generator.nms_thresh = 0.3 - - -for box_predictor in cfg.model.roi_heads.box_predictors: - box_predictor.test_topk_per_image = 1000 - box_predictor.test_score_thresh = roi_thresh - -cfg_loader = get_cfg() -cfg_loader.SOLVER.IMS_PER_BATCH = 4 -cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata -cfg_loader.DATASETS.TEST = "astro_val" -cfg_loader.DATALOADER.NUM_WORKERS = 1 -cfg_loader.SOLVER.BASE_LR = 0.001 -cfg_loader.OUTPUT_DIR = output_dir - -cfg_loader.TEST.DETECTIONS_PER_IMAGE = 1000 - -cfg_loader.OUTPUT_DIR = output_dir - -cfg.train.init_checkpoint = os.path.join(cfg_loader.OUTPUT_DIR, run_name) - -# predictor = return_predictor_transformer(cfg,cfg_loader) - -output_dir = args.output_dir - -roi_thresh=args.roi_thresh -run_name=args.run_name -dtype=dtype_from_args(args.datatype) - -if bb in ['Swin','MViTv2']: - predictor= return_predictor_transformer(cfg,cfg_loader) -else: - predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) - - -def hsc_key_mapper(dataset_dict): - filenames = [ - dataset_dict["filename_G"], - dataset_dict["filename_R"], - dataset_dict["filename_I"], - ] - return filenames - - -IR = HSCImageReader(norm=args.norm) - - -t0 = time.time() - - -print("Matching objects") -true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor) -classes = np.array([true_classes, pred_classes]) - -savename = f"{bb}_test_matched_classes.npy" -np.save(os.path.join(args.savedir, savename), classes) - -print("Took ", time.time() - t0, " seconds") - -print(classes) - -t0 = time.time() diff --git a/test_eval_model_DC2.py b/test_eval_model_DC2.py deleted file mode 100644 index 2c4fa4b..0000000 --- a/test_eval_model_DC2.py +++ /dev/null @@ -1,223 +0,0 @@ -# Some basic setup: - -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import logging -import os -import time - - -# from google.colab.patches import cv2_imshow - -# import some common libraries -import numpy as np - -# import some common detectron2 utilities -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog - -import deepdisc.astrodet.astrodet as toolkit - -logger = logging.getLogger(__name__) -from pathlib import Path - - -from deepdisc.data_format.file_io import get_data_from_json -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.inference.match_objects import get_matched_object_classes -from deepdisc.inference.predictors import return_predictor_transformer -from deepdisc.utils.parse_arguments import make_inference_arg_parser - -""" -This code will read in a trained model and output the classes for predicted objects matched to the ground truth - -""" - -args = make_inference_arg_parser().parse_args() - -output_dir = args.output_dir -roi_thresh = args.roi_thresh -savedir = args.savedir -print(savedir) -Path(savedir).mkdir(parents=True, exist_ok=True) -run_name = args.run_name - - -testfile = args.testfile - -classes = ["object"] - -dataset_names = ["test"] -datadir = "/home/shared/hsc/HSC/HSC_DR3/data/" -t0 = time.time() -dataset_dicts = {} -for i, d in enumerate(dataset_names): - dataset_dicts[d] = get_data_from_json(testfile) - - -print("Took ", time.time() - t0, "seconds to load samples") - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: - - -def return_predictor( - cfgfile, run_name, nc=1, output_dir="/home/shared/hsc/HSC/HSC_DR3/models/noclass/", roi_thresh=0.5 -): - """ - This function returns a trained model and its config file. - Used for models that have yacs config files - - Parameters - ---------- - cfgfile: str - A path to a model config file, provided by the detectron2 repo - run_name: str - Prefix used for the name of the saved model - nc: int - Number of classes used in the model - output_dir: str - THe directory to save metric outputs - roi_thresh: float - Hyperparamter that functions as a detection sensitivity level - - """ - - cfg = get_cfg() - cfg.merge_from_file(model_zoo.get_config_file(cfgfile)) # Get model structure - cfg.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg.DATASETS.TEST = ("astro_test",) # Config calls this TEST, but it should be the val dataset - cfg.DATALOADER.NUM_WORKERS = 1 - cfg.SOLVER.IMS_PER_BATCH = ( - 4 # this is images per iteration. 1 epoch is len(images)/(ims_per_batch iterations*num_gpus) - ) - cfg.SOLVER.BASE_LR = 0.001 - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.MAX_ITER = 100 # for DefaultTrainer - cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( - 512 # faster, and good enough for this toy dataset (default: 512) - ) - cfg.MODEL.ROI_HEADS.NUM_CLASSES = nc - cfg.OUTPUT_DIR = output_dir - cfg.TEST.DETECTIONS_PER_IMAGE = 1000 - cfg.INPUT.MIN_SIZE_TRAIN = 1025 - cfg.INPUT.MAX_SIZE_TRAIN = 1050 - - # Defaults - # PRE_NMS_TOPK_TEST: 6000 - # POST_NMS_TOPK_TEST: 1000 - # PRE_NMS_TOPK_TRAIN: 12000 - # POST_NMS_TOPK_TRAIN: 2000 - - cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 6000 - cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 - - cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512 - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8, 16, 32, 64, 128]] - - cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, run_name) # path to the model we just trained - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_thresh # set a custom testing threshold - cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3 - cfg.MODEL.ROI_BOX_HEAD.GAMMA = 1 - cfg.MODEL.ROI_BOX_HEAD.ALPHAS = None - - predictor = toolkit.AstroPredictor(cfg) - - return predictor, cfg - - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: -from detectron2.config import LazyConfig - -bb = args.run_name.split("_")[0] -cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - -cfg = LazyConfig.load(cfgfile) - -metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids -classes = metadata.thing_classes - -cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] -cfg.dataloader.train.total_batch_size = 4 -cfg.model.roi_heads.num_classes = args.nc -cfg.model.roi_heads.batch_size_per_image = 512 -cfg.model.backbone.bottom_up.in_chans = 6 -cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764] -cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009] -# cfg.model.roi_heads.num_components=5 -# cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads - - -for bp in cfg.model.roi_heads.box_predictors: - bp.test_score_thresh = roi_thresh -cfg.model.proposal_generator.pre_nms_topk = [6000, 6000] -cfg.model.proposal_generator.post_nms_topk = [6000, 6000] -cfg.model.proposal_generator.nms_thresh = 0.3 - - -for box_predictor in cfg.model.roi_heads.box_predictors: - box_predictor.test_topk_per_image = 1000 - box_predictor.test_score_thresh = roi_thresh - -cfg_loader = get_cfg() -cfg_loader.SOLVER.IMS_PER_BATCH = 4 -cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata -cfg_loader.DATASETS.TEST = "astro_val" -cfg_loader.DATALOADER.NUM_WORKERS = 1 -cfg_loader.SOLVER.BASE_LR = 0.001 -cfg_loader.OUTPUT_DIR = output_dir - -cfg_loader.TEST.DETECTIONS_PER_IMAGE = 1000 - -cfg_loader.OUTPUT_DIR = output_dir - -cfg.train.init_checkpoint = os.path.join(cfg_loader.OUTPUT_DIR, run_name) - -# predictor = return_predictor_transformer(cfg,cfg_loader) - -output_dir = args.output_dir -roi_thresh = args.roi_thresh -run_name = args.run_name -dt = args.datatype -if dt == 16: - dtype = np.int16 -elif dt == 8: - dtype = np.uint8 - - -if bb in ["Swin", "MViTv2"]: - predictor = return_predictor_transformer(cfg, cfg_loader) -else: - predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) - - -def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - -IR = DC2ImageReader(norm=args.norm) - - -t0 = time.time() - - -print("Matching objects") -true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor) -# true_zs, pred_pdfs = get_matched_z_pdfs(dataset_dicts['test'], IR, dc2_key_mapper, predictor) - -classes = np.array([true_classes, pred_classes]) - -savename = f"{bb}_test_matched_classes.npy" -np.save(os.path.join(args.savedir, savename), classes) - -print("Took ", time.time() - t0, " seconds") - -print(classes) - -t0 = time.time() diff --git a/test_eval_model_DC2_redshift.py b/test_eval_model_DC2_redshift.py deleted file mode 100644 index cf9af41..0000000 --- a/test_eval_model_DC2_redshift.py +++ /dev/null @@ -1,224 +0,0 @@ -# Some basic setup: - -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import logging -import os -import time - - -# from google.colab.patches import cv2_imshow - -# import some common libraries -import numpy as np - -# import some common detectron2 utilities -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog - -import deepdisc.astrodet.astrodet as toolkit - -logger = logging.getLogger(__name__) -from pathlib import Path - - -from deepdisc.data_format.file_io import get_data_from_json -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs -from deepdisc.inference.predictors import return_predictor_transformer -from deepdisc.model.models import RedshiftPDFCasROIHeads -from deepdisc.utils.parse_arguments import make_inference_arg_parser - -""" -This code will read in a trained model and output the classes for predicted objects matched to the ground truth - -""" - -args = make_inference_arg_parser().parse_args() - -output_dir = args.output_dir -roi_thresh = args.roi_thresh -savedir = args.savedir -print(savedir) -Path(savedir).mkdir(parents=True, exist_ok=True) -run_name = args.run_name - - -testfile = args.testfile - -classes = ["object"] - -dataset_names = ["test"] -datadir = "/home/shared/hsc/HSC/HSC_DR3/data/" -t0 = time.time() -dataset_dicts = {} -for i, d in enumerate(dataset_names): - dataset_dicts[d] = get_data_from_json(testfile) - - -print("Took ", time.time() - t0, "seconds to load samples") - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: - - -def return_predictor( - cfgfile, run_name, nc=1, output_dir="/home/shared/hsc/HSC/HSC_DR3/models/noclass/", roi_thresh=0.5 -): - """ - This function returns a trained model and its config file. - Used for models that have yacs config files - - Parameters - ---------- - cfgfile: str - A path to a model config file, provided by the detectron2 repo - run_name: str - Prefix used for the name of the saved model - nc: int - Number of classes used in the model - output_dir: str - THe directory to save metric outputs - roi_thresh: float - Hyperparamter that functions as a detection sensitivity level - - """ - - cfg = get_cfg() - cfg.merge_from_file(model_zoo.get_config_file(cfgfile)) # Get model structure - cfg.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg.DATASETS.TEST = ("astro_test",) # Config calls this TEST, but it should be the val dataset - cfg.DATALOADER.NUM_WORKERS = 1 - cfg.SOLVER.IMS_PER_BATCH = ( - 4 # this is images per iteration. 1 epoch is len(images)/(ims_per_batch iterations*num_gpus) - ) - cfg.SOLVER.BASE_LR = 0.001 - cfg.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg.SOLVER.MAX_ITER = 100 # for DefaultTrainer - cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( - 512 # faster, and good enough for this toy dataset (default: 512) - ) - cfg.MODEL.ROI_HEADS.NUM_CLASSES = nc - cfg.OUTPUT_DIR = output_dir - cfg.TEST.DETECTIONS_PER_IMAGE = 1000 - cfg.INPUT.MIN_SIZE_TRAIN = 1025 - cfg.INPUT.MAX_SIZE_TRAIN = 1050 - - # Defaults - # PRE_NMS_TOPK_TEST: 6000 - # POST_NMS_TOPK_TEST: 1000 - # PRE_NMS_TOPK_TRAIN: 12000 - # POST_NMS_TOPK_TRAIN: 2000 - - cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 6000 - cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 - - cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512 - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8, 16, 32, 64, 128]] - - cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, run_name) # path to the model we just trained - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_thresh # set a custom testing threshold - cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3 - cfg.MODEL.ROI_BOX_HEAD.GAMMA = 1 - cfg.MODEL.ROI_BOX_HEAD.ALPHAS = None - - predictor = toolkit.AstroPredictor(cfg) - - return predictor, cfg - - -# Inference should use the config with parameters that are used in training -# cfg now already contains everything we've set previously. We changed it a little bit for inference: -from detectron2.config import LazyConfig - -bb = args.run_name.split("_")[0] -cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - -cfg = LazyConfig.load(cfgfile) - -metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids -classes = metadata.thing_classes - -cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] -cfg.dataloader.train.total_batch_size = 4 -cfg.model.roi_heads.num_classes = args.nc -cfg.model.roi_heads.batch_size_per_image = 512 -cfg.model.backbone.bottom_up.in_chans = 6 -cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764] -cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009] -cfg.model.roi_heads.num_components = 5 -cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads - - -for bp in cfg.model.roi_heads.box_predictors: - bp.test_score_thresh = roi_thresh -cfg.model.proposal_generator.pre_nms_topk = [6000, 6000] -cfg.model.proposal_generator.post_nms_topk = [6000, 6000] -cfg.model.proposal_generator.nms_thresh = 0.3 - - -for box_predictor in cfg.model.roi_heads.box_predictors: - box_predictor.test_topk_per_image = 1000 - box_predictor.test_score_thresh = roi_thresh - -cfg_loader = get_cfg() -cfg_loader.SOLVER.IMS_PER_BATCH = 4 -cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata -cfg_loader.DATASETS.TEST = "astro_val" -cfg_loader.DATALOADER.NUM_WORKERS = 1 -cfg_loader.SOLVER.BASE_LR = 0.001 -cfg_loader.OUTPUT_DIR = output_dir - -cfg_loader.TEST.DETECTIONS_PER_IMAGE = 1000 - -cfg_loader.OUTPUT_DIR = output_dir - -cfg.train.init_checkpoint = os.path.join(cfg_loader.OUTPUT_DIR, run_name) - -# predictor = return_predictor_transformer(cfg,cfg_loader) - -output_dir = args.output_dir -roi_thresh = args.roi_thresh -run_name = args.run_name -dt = args.datatype -if dt == 16: - dtype = np.int16 -elif dt == 8: - dtype = np.uint8 - - -if bb in ["Swin", "MViTv2"]: - predictor = return_predictor_transformer(cfg, cfg_loader) -else: - predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh) - -def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - -IR = DC2ImageReader(norm=args.norm) - - -t0 = time.time() - - -print("Matching objects") -true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor) -true_zs, pred_pdfs = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor) - -print(true_zs, pred_pdfs) - -classes = np.array([true_classes, pred_classes]) - -savename = f"{bb}_test_matched_classes.npy" -np.save(os.path.join(args.savedir, savename), classes) - -print("Took ", time.time() - t0, " seconds") - -print(classes) - -t0 = time.time() diff --git a/test_run_transformers.py b/test_run_transformers.py deleted file mode 100644 index db60e1e..0000000 --- a/test_run_transformers.py +++ /dev/null @@ -1,197 +0,0 @@ -# Training script for LazyConfig models -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - - from shapely.errors import ShapelyDeprecationWarning - - warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) - -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch - -from deepdisc.data_format.augment_image import hsc_test_augs, train_augs -from deepdisc.data_format.image_readers import HSCImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import dtype_from_args, make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - - ssl._create_default_https_context = ssl._create_unverified_context - - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" - elif modname == "mvitv2": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_8c3da3.pkl" - - elif modname == "vitdet": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - # initwfile = '/home/g4merz/deblend/detectron2/projects/ViTDet/model_final_435fa9.pkl' - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_61ccd1.pkl" - - dtype = dtype_from_args(args.dtype) - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - - classes = ["star", "galaxy"] - numclasses = len(classes) - - # Register the data sets and get the metadata. - astrotrain_metadata = register_data_set("astro_train", trainfile, thing_classes=classes) - astroval_metadata = register_data_set("astro_val", testfile, thing_classes=classes) - - cfg = LazyConfig.load(cfgfile) - - # metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids - - bs = 1 - cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] - cfg.dataloader.train.total_batch_size = bs - cfg.model.roi_heads.num_classes = numclasses - cfg.model.roi_heads.batch_size_per_image = 512 - - cfg_loader = get_cfg() - cfg_loader.SOLVER.IMS_PER_BATCH = bs - cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg_loader.DATASETS.TEST = "astro_val" - # cfg_loader.DATALOADER.NUM_WORKERS = 0 - cfg_loader.DATALOADER.PREFETCH_FACTOR = 2 - cfg_loader.SOLVER.BASE_LR = 0.001 - cfg_loader.OUTPUT_DIR = output_dir - os.makedirs(cfg_loader.OUTPUT_DIR, exist_ok=True) - - cfg_loader.SOLVER.CLIP_GRADIENTS.ENABLED = True - # Type of gradient clipping, currently 2 values are supported: - # - "value": the absolute values of elements of each gradients are clipped - # - "norm": the norm of the gradient for each parameter is clipped thus - # affecting all elements in the parameter - cfg_loader.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" - # Maximum absolute value used for clipping gradients - # Floating point number p for L-p norm to be used with the "norm" - # gradient clipping type; for L-inf, please specify .inf - cfg_loader.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 - - # iterations for 15,25,35,50 epochs - epoch = int(args.tl / cfg.dataloader.train.total_batch_size) - e1 = 20 - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = 5 - - if train_head: - # cfg.train.init_checkpoint = initwfile # replace with the path were you have your model - cfg.train.init_checkpoint = None - - # Step 1) - - model = return_lazy_model(cfg) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg_loader.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg_loader.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg_loader.SOLVER.WARMUP_ITERS = 0 - cfg_loader.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - - optimizer = return_optimizer(cfg) - - # key_mapper function should take a dataset_dict as input and output a key used by the image_reader function - def hsc_key_mapper(dataset_dict): - filenames = [ - dataset_dict["filename_G"], - dataset_dict["filename_R"], - dataset_dict["filename_I"], - ] - return filenames - - IR = HSCImageReader(norm=args.norm) - mapper = DictMapper(IR, hsc_key_mapper, train_augs).map_data - loader = return_train_loader(cfg_loader, mapper) - test_mapper = DictMapper(IR, hsc_key_mapper, hsc_test_augs).map_data - test_loader = return_test_loader(cfg_loader, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - hookList = [lossHook, schedulerHook, saveHook] - - trainer = return_lazy_trainer(model, loader, optimizer, cfg, cfg_loader, hookList) - - trainer.set_period(5) - trainer.train(0, 20) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds") diff --git a/test_run_transformers_DC2.py b/test_run_transformers_DC2.py deleted file mode 100644 index f875737..0000000 --- a/test_run_transformers_DC2.py +++ /dev/null @@ -1,202 +0,0 @@ -# Training script for LazyConfig models -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - - from shapely.errors import ShapelyDeprecationWarning - - warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) - -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() - -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch - -from deepdisc.data_format.augment_image import train_augs -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import DictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - - ssl._create_default_https_context = ssl._create_unverified_context - - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" - elif modname == "mvitv2": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_8c3da3.pkl" - - elif modname == "vitdet": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - # initwfile = '/home/g4merz/deblend/detectron2/projects/ViTDet/model_final_435fa9.pkl' - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_61ccd1.pkl" - - datatype = args.dtype - if datatype == 8: - dtype = np.uint8 - elif datatype == 16: - dtype = np.int16 - - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - - classes = ["object"] - numclasses = len(classes) - - # Register the data sets and get the metadata. - astrotrain_metadata = register_data_set("astro_train", trainfile, thing_classes=classes) - astroval_metadata = register_data_set("astro_val", testfile, thing_classes=classes) - - cfg = LazyConfig.load(cfgfile) - - # metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids - - bs = 1 - cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] - cfg.dataloader.train.total_batch_size = bs - cfg.model.roi_heads.num_classes = numclasses - cfg.model.roi_heads.batch_size_per_image = 512 - cfg.model.backbone.bottom_up.in_chans = 6 - cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764] - cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009] - # cfg.model.roi_heads.num_components=5 - # cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads - - cfg_loader = get_cfg() - cfg_loader.SOLVER.IMS_PER_BATCH = bs - cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg_loader.DATASETS.TEST = "astro_val" - # cfg_loader.DATALOADER.NUM_WORKERS = 0 - cfg_loader.DATALOADER.PREFETCH_FACTOR = 2 - cfg_loader.SOLVER.BASE_LR = 0.001 - cfg_loader.OUTPUT_DIR = output_dir - os.makedirs(cfg_loader.OUTPUT_DIR, exist_ok=True) - - cfg_loader.SOLVER.CLIP_GRADIENTS.ENABLED = True - # Type of gradient clipping, currently 2 values are supported: - # - "value": the absolute values of elements of each gradients are clipped - # - "norm": the norm of the gradient for each parameter is clipped thus - # affecting all elements in the parameter - cfg_loader.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" - # Maximum absolute value used for clipping gradients - # Floating point number p for L-p norm to be used with the "norm" - # gradient clipping type; for L-inf, please specify .inf - cfg_loader.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 - - # iterations for 15,25,35,50 epochs - epoch = int(args.tl / cfg.dataloader.train.total_batch_size) - e1 = 20 - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = 5 - - if train_head: - # cfg.train.init_checkpoint = initwfile # replace with the path were you have your model - cfg.train.init_checkpoint = None - - # Step 1) - - model = return_lazy_model(cfg) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg_loader.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg_loader.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg_loader.SOLVER.WARMUP_ITERS = 0 - cfg_loader.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - - optimizer = return_optimizer(cfg) - - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - IR = DC2ImageReader(norm=args.norm) - mapper = DictMapper(IR, dc2_key_mapper, train_augs).map_data - loader = return_train_loader(cfg_loader, mapper) - test_mapper = DictMapper(IR, dc2_key_mapper).map_data - test_loader = return_test_loader(cfg_loader, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - hookList = [lossHook, schedulerHook, saveHook] - - trainer = return_lazy_trainer(model, loader, optimizer, cfg, cfg_loader, hookList) - - trainer.set_period(5) - trainer.train(0, 20) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds") diff --git a/test_run_transformers_DC2_redshift.py b/test_run_transformers_DC2_redshift.py deleted file mode 100644 index 156cfdb..0000000 --- a/test_run_transformers_DC2_redshift.py +++ /dev/null @@ -1,201 +0,0 @@ -# Training script for LazyConfig models -try: - # ignore ShapelyDeprecationWarning from fvcore - import warnings - - from shapely.errors import ShapelyDeprecationWarning - - warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) - -except: - pass -warnings.filterwarnings("ignore", category=RuntimeWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -# Some basic setup: -# Setup detectron2 logger -from detectron2.utils.logger import setup_logger - -setup_logger() -import gc -import os -import time - -import detectron2.utils.comm as comm - -# import some common libraries -import numpy as np -import torch - -# import some common detectron2 utilities -from detectron2.config import LazyConfig, get_cfg -from detectron2.engine import launch - -from deepdisc.data_format.augment_image import train_augs -from deepdisc.data_format.image_readers import DC2ImageReader -from deepdisc.data_format.register_data import register_data_set -from deepdisc.model.loaders import RedshiftDictMapper, return_test_loader, return_train_loader -from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model -from deepdisc.training.trainers import ( - return_evallosshook, - return_lazy_trainer, - return_optimizer, - return_savehook, - return_schedulerhook, -) -from deepdisc.utils.parse_arguments import make_training_arg_parser - - -def main(train_head, args): - # Hack if you get SSL certificate error - import ssl - - ssl._create_default_https_context = ssl._create_unverified_context - - output_dir = args.output_dir - output_name = args.run_name - dirpath = args.data_dir # Path to dataset - scheme = args.scheme - alphas = args.alphas - modname = args.modname - if modname == "swin": - cfgfile = "./tests/deepdisc/test_data/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" - elif modname == "mvitv2": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py" - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_8c3da3.pkl" - - elif modname == "vitdet": - cfgfile = "/home/shared/hsc/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py" - # initwfile = '/home/g4merz/deblend/detectron2/projects/ViTDet/model_final_435fa9.pkl' - # initwfile = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_61ccd1.pkl" - - datatype = args.dtype - if datatype == 8: - dtype = np.uint8 - elif datatype == 16: - dtype = np.int16 - - trainfile = dirpath + "single_test.json" - testfile = dirpath + "single_test.json" - - classes = ["object"] - numclasses = len(classes) - - # Register the data sets and get the metadata. - astrotrain_metadata = register_data_set("astro_train", trainfile, thing_classes=classes) - astroval_metadata = register_data_set("astro_val", testfile, thing_classes=classes) - - cfg = LazyConfig.load(cfgfile) - - # metadata = MetadataCatalog.get(cfg.dataloader.test.dataset.names) # to get labels from ids - - bs = 2 - cfg.model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] - cfg.dataloader.train.total_batch_size = bs - cfg.model.roi_heads.num_classes = numclasses - cfg.model.roi_heads.batch_size_per_image = 512 - cfg.model.backbone.bottom_up.in_chans = 6 - cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764] - cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009] - cfg.model.roi_heads.num_components = 5 - cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads - - cfg_loader = get_cfg() - cfg_loader.SOLVER.IMS_PER_BATCH = bs - cfg_loader.DATASETS.TRAIN = "astro_train" # Register Metadata - cfg_loader.DATASETS.TEST = "astro_val" - # cfg_loader.DATALOADER.NUM_WORKERS = 0 - cfg_loader.DATALOADER.PREFETCH_FACTOR = 2 - cfg_loader.SOLVER.BASE_LR = 0.001 - cfg_loader.OUTPUT_DIR = output_dir - os.makedirs(cfg_loader.OUTPUT_DIR, exist_ok=True) - - cfg_loader.SOLVER.CLIP_GRADIENTS.ENABLED = True - # Type of gradient clipping, currently 2 values are supported: - # - "value": the absolute values of elements of each gradients are clipped - # - "norm": the norm of the gradient for each parameter is clipped thus - # affecting all elements in the parameter - cfg_loader.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" - # Maximum absolute value used for clipping gradients - # Floating point number p for L-p norm to be used with the "norm" - # gradient clipping type; for L-inf, please specify .inf - cfg_loader.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 - - # iterations for 15,25,35,50 epochs - epoch = int(args.tl / cfg.dataloader.train.total_batch_size) - e1 = 20 - e2 = epoch * 10 - e3 = epoch * 20 - efinal = epoch * 35 - - val_per = 5 - - if train_head: - # cfg.train.init_checkpoint = initwfile # replace with the path were you have your model - cfg.train.init_checkpoint = None - - # Step 1) - - model = return_lazy_model(cfg) - - cfg.optimizer.params.model = model - cfg.optimizer.lr = 0.001 - - cfg_loader.SOLVER.STEPS = [] # do not decay learning rate for retraining - cfg_loader.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - cfg_loader.SOLVER.WARMUP_ITERS = 0 - cfg_loader.SOLVER.MAX_ITER = e1 # for DefaultTrainer - - # optimizer = instantiate(cfg.optimizer) - - optimizer = return_optimizer(cfg) - - def dc2_key_mapper(dataset_dict): - filename = dataset_dict["filename"] - return filename - - IR = DC2ImageReader() - mapper = RedshiftDictMapper(IR, dc2_key_mapper, train_augs).map_data - loader = return_train_loader(cfg_loader, mapper) - test_mapper = RedshiftDictMapper(IR, dc2_key_mapper).map_data - test_loader = return_test_loader(cfg_loader, test_mapper) - - saveHook = return_savehook(output_name) - lossHook = return_evallosshook(val_per, model, test_loader) - schedulerHook = return_schedulerhook(optimizer) - #hookList = [lossHook, schedulerHook, saveHook] - hookList = [schedulerHook, saveHook] - trainer = return_lazy_trainer(model, loader, optimizer, cfg, cfg_loader, hookList) - - trainer.set_period(5) - trainer.train(0, 20) - if comm.is_main_process(): - np.save(output_dir + output_name + "_losses", trainer.lossList) - #np.save(output_dir + output_name + "_val_losses", trainer.vallossList) - return - - -if __name__ == "__main__": - args = make_training_arg_parser().parse_args() - print("Command Line Args:", args) - - print("Training head layers") - train_head = True - t0 = time.time() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=( - train_head, - args, - ), - ) - - torch.cuda.empty_cache() - gc.collect() - - print(f"Took {time.time()-t0} seconds")