diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ba0a3d64..fb37afd6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/sae_lens/training/config.py b/sae_lens/training/config.py index b75398a8..4e2a8c73 100644 --- a/sae_lens/training/config.py +++ b/sae_lens/training/config.py @@ -167,10 +167,10 @@ def __post_init__(self): n_tokens_per_buffer = ( self.store_batch_size * self.context_size * self.n_batches_in_buffer ) - print(f"n_tokens_per_buffer (millions): {n_tokens_per_buffer / 10 **6}") + print(f"n_tokens_per_buffer (millions): {n_tokens_per_buffer / 10 ** 6}") n_contexts_per_buffer = self.store_batch_size * self.n_batches_in_buffer print( - f"Lower bound: n_contexts_per_buffer (millions): {n_contexts_per_buffer / 10 **6}" + f"Lower bound: n_contexts_per_buffer (millions): {n_contexts_per_buffer / 10 ** 6}" ) total_training_steps = ( @@ -187,10 +187,10 @@ def __post_init__(self): total_training_steps // self.feature_sampling_window ) print( - f"n_tokens_per_feature_sampling_window (millions): {(self.feature_sampling_window * self.context_size * self.train_batch_size) / 10 **6}" + f"n_tokens_per_feature_sampling_window (millions): {(self.feature_sampling_window * self.context_size * self.train_batch_size) / 10 ** 6}" ) print( - f"n_tokens_per_dead_feature_window (millions): {(self.dead_feature_window * self.context_size * self.train_batch_size) / 10 **6}" + f"n_tokens_per_dead_feature_window (millions): {(self.dead_feature_window * self.context_size * self.train_batch_size) / 10 ** 6}" ) print( f"We will reset the sparsity calculation {n_feature_window_samples} times."