diff --git a/pyproject.toml b/pyproject.toml
index c8eb3b18..02c516b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,6 +31,7 @@ safetensors = "^0.4.2"
 typer = "^0.12.3"
 mamba-lens = { version = "^0.0.4", optional = true }
 pyzmq = "26.0.0"
+automated-interpretability = "^0.0.2"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/sae_lens/analysis/neuronpedia_integration.py b/sae_lens/analysis/neuronpedia_integration.py
index fcf98d29..b1ad7412 100644
--- a/sae_lens/analysis/neuronpedia_integration.py
+++ b/sae_lens/analysis/neuronpedia_integration.py
@@ -1,10 +1,50 @@
 import json
+import os
 import urllib.parse
 import webbrowser
-from typing import Optional
+from datetime import datetime
+from typing import Literal, Optional
 
 import requests
 
+NEURONPEDIA_DOMAIN = "https://neuronpedia.org"
+
+# Constants for replacing NaNs and Infs in outputs
+POSITIVE_INF_REPLACEMENT = 9999
+NEGATIVE_INF_REPLACEMENT = -9999
+NAN_REPLACEMENT = 0
+OTHER_INVALID_REPLACEMENT = -99999
+
+
+def NanAndInfReplacer(value: str):
+    replacements = {
+        "-Infinity": NEGATIVE_INF_REPLACEMENT,
+        "Infinity": POSITIVE_INF_REPLACEMENT,
+        "NaN": NAN_REPLACEMENT,
+    }
+    if value in replacements:
+        replacedValue = replacements[value]
+        # print(f"Warning: Replacing value {value} with {replacedValue}")
+        return float(replacedValue)
+    else:
+        # print(f"Warning: Replacing value {value} with {NAN_REPLACEMENT}")
+        return NAN_REPLACEMENT
+
+
+def get_neuronpedia_feature(
+    feature: int,
+    layer: int,
+    model: str = "gpt2-small",
+    dataset: str = "res-jb",
+):
+    url = NEURONPEDIA_DOMAIN + "/api/feature/"
+    url = url + f"{model}/{layer}-{dataset}/{feature}"
+
+    result = requests.get(url).json()
+    result["index"] = int(result["index"])
+
+    return result
+
 
 def get_neuronpedia_quick_list(
     features: list[int],
@@ -13,7 +53,7 @@ def get_neuronpedia_quick_list(
     dataset: str = "res-jb",
     name: str = "temporary_list",
 ):
-    url = "https://neuronpedia.org/quick-list/"
+    url = NEURONPEDIA_DOMAIN + "/quick-list/"
     name = urllib.parse.quote(name)
     url = url + "?name=" + name
     list_feature = [
@@ -30,27 +70,26 @@ def get_neuronpedia_quick_list(
     return url
 
 
-def get_neuronpedia_feature(
-    feature: int,
-    layer: int,
-    model: str = "gpt2-small",
-    dataset: str = "res-jb",
-):
-    url = "https://neuronpedia.org/api/feature/"
-    url = url + f"{model}/{layer}-{dataset}/{feature}"
-
-    result = requests.get(url).json()
-    result["index"] = int(result["index"])
+class NeuronpediaActivation(object):
+    id: str = ""
+    tokens = []
+    act_values = []
 
-    return result
+    def __init__(self, id: str, tokens: list[str], act_values: list[float]):
+        self.id = id
+        self.tokens = tokens
+        self.act_values = act_values
 
 
-class NeuronpediaListFeature(object):
+class NeuronpediaFeature(object):
     modelId = ""
     layer = 0
     dataset = ""
     index = 0
     description = ""
+    activations = []
+    autointerp_explanation = ""
+    autointerp_explanation_score = 0
 
     def __init__(
         self,
@@ -59,22 +98,36 @@ def __init__(
         dataset: str,
         feature: int,
         description: str = "",
+        activations: list[NeuronpediaActivation] = [],
+        autointerp_explanation: str = "",
+        autointerp_explanation_score: float = 0.0,
     ):
         self.modelId = modelId
         self.layer = layer
         self.dataset = dataset
         self.feature = feature
         self.description = description
+        self.activations = activations
+        self.autointerp_explanation = autointerp_explanation
+        self.autointerp_explanation_score = autointerp_explanation_score
+
+    def has_activating_text(self):
+        has_activating_text = False
+        for activation in self.activations:
+            if max(activation.act_values) > 0:
+                has_activating_text = True
+                break
+        return has_activating_text
 
 
 def make_neuronpedia_list_with_features(
     api_key: str,
     list_name: str,
-    features: list[NeuronpediaListFeature],
+    features: list[NeuronpediaFeature],
     list_description: Optional[str] = None,
     open_browser: bool = True,
 ):
-    url = "https://neuronpedia.org/api/list/new-with-features"
+    url = NEURONPEDIA_DOMAIN + "/api/list/new-with-features"
 
     # make POST json request with body
     body = {
@@ -99,3 +152,275 @@ def make_neuronpedia_list_with_features(
         return result["url"]
     else:
         raise Exception("Error in creating list: " + result["message"])
+
+
+def test_key(api_key: str):
+    url = NEURONPEDIA_DOMAIN + "/api/test"
+    body = {
+        "apiKey": api_key,
+    }
+    response = requests.post(url, json=body)
+    if response.status_code != 200:
+        raise Exception("Neuronpedia API key is not valid.")
+
+
+async def autointerp_neuronpedia_features(
+    features: list[NeuronpediaFeature],
+    openai_api_key: str,
+    autointerp_retry_attempts: int = 3,
+    autointerp_score_max_concurrent: int = 20,  # good for this to match num_activations_to_use_for_score
+    neuronpedia_api_key: str = "",
+    # TODO check max budget estimate based on: num features, num act texts, act text lengths. fail if too high.
+    # max_budget_approx_usd: float = 5.00,
+    do_score: bool = True,
+    output_dir: str = "neuronpedia_outputs/autointerp",
+    num_activations_to_use: int = 20,
+    upload_to_neuronpedia: bool = True,
+    autointerp_model_name: Literal["gpt-3.5-turbo", "gpt-4-turbo"] = "gpt-3.5-turbo",
+):
+    print("\n\n")
+
+    # make output_file named autointerp-<timestamp>
+    output_file = output_dir + "/" + datetime.now().strftime("%Y%m%d-%H%M%S") + ".jsonl"
+    if not os.path.exists(output_dir):
+        print("Creating output directory " + output_dir)
+        os.makedirs(output_dir, exist_ok=True)
+    print("===== Your results will be saved to: " + output_file + "=====")
+
+    # we import this here instead of top of file because the library requires the API key to be set first
+    os.environ["OPENAI_API_KEY"] = openai_api_key
+    from neuron_explainer.activations.activation_records import calculate_max_activation
+    from neuron_explainer.activations.activations import ActivationRecord
+    from neuron_explainer.explanations.calibrated_simulator import (
+        UncalibratedNeuronSimulator,
+    )
+    from neuron_explainer.explanations.explainer import (
+        ContextSize,
+        TokenActivationPairExplainer,
+    )
+    from neuron_explainer.explanations.few_shot_examples import FewShotExampleSet
+    from neuron_explainer.explanations.prompt_builder import PromptFormat
+    from neuron_explainer.explanations.scoring import simulate_and_score
+    from neuron_explainer.explanations.simulator import (
+        LogprobFreeExplanationTokenSimulator,
+    )
+
+    """
+    This does the following:
+    1. Fetches the features from Neuronpedia, including their activation texts
+    2. Explains the features using the autointerp_explainer_model_name
+    3. Scores the features using the autointerp_scorer_model_name
+    4. Saves the results in output_dir
+    5. Uploads the results to Neuronpedia
+
+    The openai_api_key is not sent to Neuronpedia, only to OpenAI.
+    """
+
+    if upload_to_neuronpedia and neuronpedia_api_key == "":
+        raise Exception(
+            "You need to provide a Neuronpedia API key to upload the results to Neuronpedia."
+        )
+
+    test_key(neuronpedia_api_key)
+
+    # 1. Fetches the features from Neuronpedia, including their activation texts. Perform check for dead features.
+    print("\n\n=== Step 1) Fetching features from Neuronpedia")
+    for feature in features:
+        feature_data = get_neuronpedia_feature(
+            feature=feature.feature,
+            layer=feature.layer,
+            model=feature.modelId,
+            dataset=feature.dataset,
+        )
+
+        if "modelId" not in feature_data:
+            raise Exception(
+                f"Feature {feature.feature} in layer {feature.layer} of model {feature.modelId} and dataset {feature.dataset} does not exist."
+            )
+
+        if "activations" not in feature_data or len(feature_data["activations"]) == 0:
+            raise Exception(
+                f"Feature {feature.feature} in layer {feature.layer} of model {feature.modelId} and dataset {feature.dataset} does not have activations."
+            )
+
+        activations = feature_data["activations"]
+        activations_to_add = []
+        for activation in activations:
+            if len(activations_to_add) < num_activations_to_use:
+                activations_to_add.append(
+                    NeuronpediaActivation(
+                        id=activation["id"],
+                        tokens=activation["tokens"],
+                        act_values=activation["values"],
+                    )
+                )
+        feature.activations = activations_to_add
+
+        if feature.has_activating_text() is False:
+            raise Exception(
+                f"Feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature} appears dead - it does not have activating text."
+            )
+
+    # TODO check max budget estimate based on number of features and act texts and act text length, fail if too high
+
+    # 2. Explain the features using the selected autointerp_explainer_model_name
+    for iteration_num, feature in enumerate(features):
+        # print start time
+        start_time = datetime.now()
+
+        print(
+            f"\n========== Feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature} ({iteration_num + 1} of {len(features)} Features) =========="
+        )
+        print(
+            f"\n=== Step 2) Explaining feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature}"
+        )
+        activationRecords = []
+        for activation in feature.activations:
+            activationRecord = ActivationRecord(
+                tokens=activation.tokens, activations=activation.act_values
+            )
+            activationRecords.append(activationRecord)
+
+        explainer = TokenActivationPairExplainer(
+            model_name=autointerp_model_name,
+            prompt_format=PromptFormat.HARMONY_V4,
+            context_size=ContextSize.SIXTEEN_K,
+            max_concurrent=1,
+        )
+
+        explanations = []
+        for _ in range(autointerp_retry_attempts):
+            try:
+                explanations = await explainer.generate_explanations(
+                    all_activation_records=activationRecords,
+                    max_activation=calculate_max_activation(activationRecords),
+                    num_samples=1,
+                )
+            except Exception as e:
+                print(f"ERROR, RETRYING: {e}")
+            else:
+                break
+        else:
+            print(
+                f"ERROR: Failed to explain feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature}"
+            )
+
+        assert len(explanations) == 1
+        explanation = explanations[0]
+        # GPT ends its explanations with a period. Remove it.
+        if explanation.endswith("."):
+            explanation = explanation[:-1]
+        print(f"===== {autointerp_model_name}'s explanation: {explanation}")
+        feature.autointerp_explanation = explanation
+
+        # 3. Scores the features using the autointerp_scorer_model_name
+        if do_score:
+            print(
+                f"\n=== Step 3) Scoring feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature}"
+            )
+            print("=== This can take up to 30 seconds.")
+
+            # GPT struggles with non-ascii so we turn them into string representations
+            # make a temporary activation records copy for this, so we can return the original later
+            tempActivationRecords: list[ActivationRecord] = []
+            for activationRecord in activationRecords:
+                replacedActTokens: list[str] = []
+                for _, token in enumerate(activationRecord.tokens):
+                    replacedActTokens.append(
+                        token.replace("<|endoftext|>", "<|not_endoftext|>")
+                        .replace(" 55", "_55")
+                        .encode("ascii", errors="backslashreplace")
+                        .decode("ascii")
+                    )
+                tempActivationRecords.append(
+                    ActivationRecord(
+                        tokens=replacedActTokens,
+                        activations=activationRecord.activations,
+                    )
+                )
+
+            # Simulate and score the explanation.
+            score = None
+            scored_simulation = None
+            for _ in range(autointerp_retry_attempts):
+                try:
+                    simulator = UncalibratedNeuronSimulator(
+                        LogprobFreeExplanationTokenSimulator(
+                            autointerp_model_name,
+                            explanation,
+                            json_mode=True,
+                            max_concurrent=autointerp_score_max_concurrent,
+                            few_shot_example_set=FewShotExampleSet.JL_FINE_TUNED,
+                            prompt_format=PromptFormat.HARMONY_V4,
+                        )
+                    )
+                    scored_simulation = await simulate_and_score(
+                        simulator, tempActivationRecords
+                    )
+                    score = scored_simulation.get_preferred_score()
+                except Exception as e:
+                    print(f"ERROR, RETRYING: {e}")
+                else:
+                    break
+
+            if (
+                score is None
+                or scored_simulation is None
+                or len(scored_simulation.scored_sequence_simulations)
+                != num_activations_to_use
+            ):
+                print(
+                    f"ERROR: Failed to score feature {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature}. Skipping it."
+                )
+                continue
+            feature.autointerp_explanation_score = score
+            print(f"===== {autointerp_model_name}'s score: {(score * 100):.0f}")
+
+            # replace NaNs and Infs in the output so we get valid JSON
+            output_data = json.dumps(
+                {
+                    "apiKey": neuronpedia_api_key,
+                    "feature": {
+                        "modelId": feature.modelId,
+                        "layer": f"{feature.layer}-{feature.dataset}",
+                        "index": feature.feature,
+                        "activations": feature.activations,
+                        "explanation": feature.autointerp_explanation,
+                        "explanationScore": feature.autointerp_explanation_score,
+                        "autointerpModel": autointerp_model_name,
+                        "simulatedActivations": scored_simulation.scored_sequence_simulations,
+                    },
+                },
+                default=vars,
+            )
+            output_data_json = json.loads(
+                output_data,
+                parse_constant=NanAndInfReplacer,
+            )
+            output_data_str = json.dumps(output_data)
+
+            # 4. Save the results in output_file
+            # open output_file and append the feature
+            print(f"\n=== Step 4) Saving feature to {output_file}")
+            with open(output_file, "a") as f:
+                f.write(output_data_str)
+                f.write("\n")
+
+            # 5. Uploads the results to Neuronpedia
+            if upload_to_neuronpedia:
+                print(
+                    f"\n=== Step 5) Uploading feature to Neuronpedia: {feature.modelId}@{feature.layer}-{feature.dataset}:{feature.feature}"
+                )
+                url = NEURONPEDIA_DOMAIN + "/api/upload-explanation"
+                body = output_data_json
+                response = requests.post(url, json=body)
+                if response.status_code != 200:
+                    print(
+                        f"ERROR: Couldn't upload explanation to Neuronpedia: {response.text}"
+                    )
+
+        # print end_time minus start_time)
+        end_time = datetime.now()
+        print("\n========== Time Spent for Feature: {}\n".format(end_time - start_time))
+
+    print("\n\n========== Generation and Upload Complete ==========\n\n")
diff --git a/tests/unit/analysis/test_neuronpedia_integration.py b/tests/unit/analysis/test_neuronpedia_integration.py
index 0ad743a6..f9420446 100644
--- a/tests/unit/analysis/test_neuronpedia_integration.py
+++ b/tests/unit/analysis/test_neuronpedia_integration.py
@@ -1,7 +1,8 @@
 import pytest
 
 from sae_lens.analysis.neuronpedia_integration import (
-    NeuronpediaListFeature,
+    NeuronpediaFeature,
+    autointerp_neuronpedia_features,
     get_neuronpedia_feature,
     make_neuronpedia_list_with_features,
 )
@@ -26,14 +27,14 @@ def test_make_neuronpedia_list_with_features():
         list_name="test_api",
         list_description="List descriptions are optional",
         features=[
-            NeuronpediaListFeature(
+            NeuronpediaFeature(
                 modelId="gpt2-small",
                 layer=0,
                 dataset="att-kk",
                 feature=11,
                 description="List feature descriptions are optional as well.",
             ),
-            NeuronpediaListFeature(
+            NeuronpediaFeature(
                 modelId="gpt2-small",
                 layer=6,
                 dataset="res_scefr-ajt",
@@ -42,3 +43,31 @@ def test_make_neuronpedia_list_with_features():
             ),
         ],
     )
+
+
+@pytest.mark.skip(
+    reason="Need a way to test with an API key - maybe test to dev environment?"
+)
+@pytest.mark.anyio
+async def test_neuronpedia_autointerp():
+    features = [
+        NeuronpediaFeature(
+            modelId="gpt2-small",
+            layer=0,
+            dataset="att-kk",
+            feature=11,
+        ),
+        NeuronpediaFeature(
+            modelId="gpt2-small",
+            layer=0,
+            dataset="att-kk",
+            feature=12,
+        ),
+    ]
+    await autointerp_neuronpedia_features(
+        features=features,
+        openai_api_key="your-oai-key",
+        neuronpedia_api_key="your-np-key",
+        autointerp_model_name="gpt-3.5-turbo",
+        num_activations_to_use=20,
+    )
diff --git a/tutorials/neuronpedia/neuronpedia.py b/tutorials/neuronpedia/neuronpedia.py
index 86d5dd1c..cbd86eae 100755
--- a/tutorials/neuronpedia/neuronpedia.py
+++ b/tutorials/neuronpedia/neuronpedia.py
@@ -5,9 +5,7 @@
 import math
 import os
 import subprocess
-from decimal import Decimal
 from pathlib import Path
-from typing import Any
 
 import requests
 import torch
@@ -17,6 +15,7 @@
 from rich.panel import Panel
 from typing_extensions import Annotated
 
+from sae_lens.analysis.neuronpedia_integration import NanAndInfReplacer
 from sae_lens.toolkit.pretrained_saes import load_sparsity
 from sae_lens.training.sparse_autoencoder import SparseAutoencoder
 
@@ -376,11 +375,7 @@ def upload(
     for file_path in files_to_upload:
         print("===== Uploading file: " + os.path.basename(file_path))
         f = open(file_path, "r")
-        data = json.load(f)
-
-        # Replace NaNs
-        data_fixed = json.dumps(data, cls=NanConverter)
-        data = json.loads(data_fixed)
+        data = json.load(f, parse_constant=NanAndInfReplacer)
 
         url = host + "/api/local/upload-features"
         requests.post(
@@ -444,23 +439,5 @@ def upload_dead_stubs(
     )
 
 
-# Helper utilities that help fix weird NaNs in the feature outputs
-
-
-def nanToNeg999(obj: Any) -> Any:
-    if isinstance(obj, dict):
-        return {k: nanToNeg999(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [nanToNeg999(v) for v in obj]
-    elif (isinstance(obj, float) or isinstance(obj, Decimal)) and math.isnan(obj):
-        return -999
-    return obj
-
-
-class NanConverter(json.JSONEncoder):
-    def encode(self, o: Any, *args: Any, **kwargs: Any):
-        return super().encode(nanToNeg999(o), *args, **kwargs)
-
-
 if __name__ == "__main__":
     app()