diff --git a/src/imitation/algorithms/adversarial/common.py b/src/imitation/algorithms/adversarial/common.py index 48129fa67..da578135f 100644 --- a/src/imitation/algorithms/adversarial/common.py +++ b/src/imitation/algorithms/adversarial/common.py @@ -176,7 +176,7 @@ def __init__( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/guide/variable_horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. Raises: diff --git a/src/imitation/algorithms/base.py b/src/imitation/algorithms/base.py index fd33c5f40..b1a2b6861 100644 --- a/src/imitation/algorithms/base.py +++ b/src/imitation/algorithms/base.py @@ -48,7 +48,7 @@ def __init__( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/getting-started/variable-horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. """ self._logger = custom_logger or imit_logger.configure() @@ -61,8 +61,8 @@ def __init__( "Additionally, even unbiased algorithms can exploit " "the information leak from the termination condition, " "producing spuriously high performance. See " - "https://imitation.readthedocs.io/en/latest/getting-started/" - "variable-horizon.html for more information.", + "https://imitation.readthedocs.io/en/latest/main-concepts/" + "variable_horizon.html for more information.", ) self._horizon = None @@ -100,8 +100,8 @@ def _check_fixed_horizon(self, horizons: Iterable[int]) -> None: f"Episodes of different length detected: {horizons}. " "Variable horizon environments are discouraged -- " "termination conditions leak information about reward. See " - "https://imitation.readthedocs.io/en/latest/getting-started/" - "variable-horizon.html for more information. " + "https://imitation.readthedocs.io/en/latest/main-concepts/" + "variable_horizon.html for more information. " "If you are SURE you want to run imitation on a " "variable horizon task, then please pass in the flag: " "`allow_variable_horizon=True`.", @@ -152,7 +152,7 @@ def __init__( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/getting-started/variable-horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. """ super().__init__( diff --git a/src/imitation/algorithms/density.py b/src/imitation/algorithms/density.py index fcc5e5ac9..c59f80c8a 100644 --- a/src/imitation/algorithms/density.py +++ b/src/imitation/algorithms/density.py @@ -104,7 +104,7 @@ def __init__( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/guide/variable_horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. """ self.is_stationary = is_stationary diff --git a/src/imitation/algorithms/preference_comparisons.py b/src/imitation/algorithms/preference_comparisons.py index 413cd979a..96cc726de 100644 --- a/src/imitation/algorithms/preference_comparisons.py +++ b/src/imitation/algorithms/preference_comparisons.py @@ -1550,7 +1550,7 @@ def __init__( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/guide/variable_horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. rng: random number generator to use for initializing subcomponents such as fragmenter. diff --git a/src/imitation/scripts/train_preference_comparisons.py b/src/imitation/scripts/train_preference_comparisons.py index 79ee4c136..e9703f78b 100644 --- a/src/imitation/scripts/train_preference_comparisons.py +++ b/src/imitation/scripts/train_preference_comparisons.py @@ -133,7 +133,7 @@ def train_preference_comparisons( training. If True, overrides this safety check. WARNING: variable horizon episodes leak information about the reward via termination condition, and can seriously confound evaluation. Read - https://imitation.readthedocs.io/en/latest/guide/variable_horizon.html + https://imitation.readthedocs.io/en/latest/main-concepts/variable_horizon.html before overriding this. checkpoint_interval: Save the reward model and policy models (if trajectory_generator contains a policy) every `checkpoint_interval`