config/hyperparameter_search.conf.yaml

experiment_name: "searches"
# Two supported modes of experiments are:
#     - fixed_configurations     Each model is run with the configurations specified in the "models" section.
#                                See `configs/fixed_configs.yaml` for an example.
#     - hyperparameter_search    Each model is run with num_different_configs configs sampled from the
#                                search space defined in the respective "searchspaces" file.
#                                See `configs/hyperparameter_search.yaml` for an example.
experiment_mode: "hyperparameter_search"
# Random seed (for reproducibility).
seed: 2018
# Number of train/validation/test splits
num_different_splits: 2
# Number of random weight initializations
num_inits: 2
# Number of random samples from the search spaces.
num_different_configs: 2
# Total number of experiments that will be run is
# num_different_configs x num_models x num_datasets x num_different_splits x num_inits.

# Path to the file with default hyperparameter configuration for training.
# You might need to change this to the absolute path.
default_config: "config/train.conf.yaml"

# List of models to train.
# Each model is defined by a YAML configuration file.
# Configurations in the model configuration files may override parameters from the default training configurations.
# See config/optimized/gcn.conf.yaml for an example.
# Note that a hyperparameter search on LabelProp_vanilla is superfluous since it has no tunable hyperparameters.
models:
    - "config/optimized/gcn.conf.yaml"
    - "config/optimized/gat.conf.yaml"
    - "config/optimized/monet.conf.yaml"
    - "config/optimized/graphsage_mean.conf.yaml"
    - "config/optimized/graphsage_maxpool.conf.yaml"
    - "config/optimized/graphsage_meanpool.conf.yaml"
    - "config/optimized/mlp.conf.yaml"
    - "config/optimized/logregr.conf.yaml"
    - "config/optimized/labelprop_smoothed.conf.yaml"

# List of model names pointing to the corresponding search space configuration file.
searchspaces:
    GCN: "config/searchspace/gcn.searchspace.yaml"
    GAT: "config/searchspace/gat.searchspace.yaml"
    MoNet: "config/searchspace/monet.searchspace.yaml"
    GraphSAGE_mean: "config/searchspace/graphsage_mean.searchspace.yaml"
    GraphSAGE_maxpool: "config/searchspace/graphsage_maxpool.searchspace.yaml"
    GraphSAGE_meanpool: "config/searchspace/graphsage_meanpool.searchspace.yaml"
    MLP: "config/searchspace/mlp.searchspace.yaml"
    LogRegr: "config/searchspace/logregr.searchspace.yaml"
    LabelProp_smoothed: "config/searchspace/labelprop_smoothed.searchspace.yaml"

# List of datasets to run searches on.
datasets:
    - "data/npz/cora.npz"
    - "data/npz/citeseer.npz"
    - "data/npz/pubmed.npz"
    - "data/npz/cora_full.npz"
    - "data/npz/ms_academic_cs.npz"
    - "data/npz/ms_academic_phy.npz"
    - "data/npz/amazon_electronics_computers.npz"
    - "data/npz/amazon_electronics_photo.npz"

# Datasets can be provided in one of the following formats:
#     - npz                 Load data stored in compressed numpy binary format. This is the preferred way to store and
#                           load data. See gnnbench/data/io.py for details on how to create and read files in this
#                           format.
#     - planetoid           Load the data and splits in Planetoid format.
#                           By default, we only provide Cora, CiteSeer and PubMed datasets in the Planetoid format.
#     - planetoid_random    Load the data in Planetoid format, but IGNORE the predefined train/validation/test split.
dataset_format: "npz"

# Performance metrics to collect. Must always include "accuracy" as this is needed by some early stopping criteria.
# To see which other metrics are available and to define new metrics see gnnbench/metrics.py.
# To enable a metric put the function name as a string here.
metrics:
    - "accuracy" # must be included as it is needed by early stopping
    - "f1"

# Host of the MongoDB server.
db_host: "localhost"
# Port of the MongoDB server.
db_port: 27017
# Name of the MongoDB database to store the results. If the database does not exist it is created automatically.
# Note: Training jobs generated by create_jobs.py will always be stored in the "pending" database.
target_db_name: "gnnbench-search-results"