diff --git a/captum/attr/_core/llm_attr.py b/captum/attr/_core/llm_attr.py
index ff51e079e..8f66b0748 100644
--- a/captum/attr/_core/llm_attr.py
+++ b/captum/attr/_core/llm_attr.py
@@ -508,6 +508,7 @@ def attribute(
                     skip_tokens = self.tokenizer.convert_tokens_to_ids(skip_tokens)
             else:
                 skip_tokens = []
+            skip_tokens = cast(List[int], skip_tokens)
 
             if isinstance(target, str):
                 encoded = self.tokenizer.encode(target)
@@ -700,6 +701,7 @@ def attribute(
                     skip_tokens = self.tokenizer.convert_tokens_to_ids(skip_tokens)
             else:
                 skip_tokens = []
+            skip_tokens = cast(List[int], skip_tokens)
 
             if isinstance(target, str):
                 encoded = self.tokenizer.encode(target)
diff --git a/captum/attr/_core/occlusion.py b/captum/attr/_core/occlusion.py
index 298a95f8d..33c153110 100644
--- a/captum/attr/_core/occlusion.py
+++ b/captum/attr/_core/occlusion.py
@@ -384,7 +384,7 @@ def _occlusion_mask(
     def _get_feature_range_and_mask(
         self, input: Tensor, input_mask: Optional[Tensor], **kwargs: Any
     ) -> Tuple[int, int, Union[None, Tensor, Tuple[Tensor, ...]]]:
-        feature_max = np.prod(kwargs["shift_counts"])
+        feature_max = int(np.prod(kwargs["shift_counts"]))
         return 0, feature_max, None
 
     def _get_feature_counts(
diff --git a/captum/attr/_utils/attribution.py b/captum/attr/_utils/attribution.py
index cf02fd3c0..9cb9b297b 100644
--- a/captum/attr/_utils/attribution.py
+++ b/captum/attr/_utils/attribution.py
@@ -367,7 +367,9 @@ def multiplies_by_inputs(self) -> bool:
         return True
 
 
-class InternalAttribution(Attribution, Generic[ModuleOrModuleList]):
+# mypy false positive "Free type variable expected in Generic[...]" but
+# ModuleOrModuleList is a TypeVar
+class InternalAttribution(Attribution, Generic[ModuleOrModuleList]):  # type: ignore
     r"""
     Shared base class for LayerAttrubution and NeuronAttribution,
     attribution types that require a model and a particular layer.
diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py
index 26f5212fa..8c679266e 100644
--- a/captum/influence/_core/tracincp_fast_rand_proj.py
+++ b/captum/influence/_core/tracincp_fast_rand_proj.py
@@ -189,7 +189,7 @@ def __init__(
         self.vectorize = vectorize
 
         # TODO: restore prior state
-        self.final_fc_layer = final_fc_layer  # type: ignore
+        self.final_fc_layer = cast(Module, final_fc_layer)
         for param in self.final_fc_layer.parameters():
             param.requires_grad = True
 
@@ -212,8 +212,7 @@ def final_fc_layer(self) -> Module:
         return self._final_fc_layer
 
     @final_fc_layer.setter
-    # pyre-fixme[3]: Return type must be annotated.
-    def final_fc_layer(self, layer: Union[Module, str]):
+    def final_fc_layer(self, layer: Union[Module, str]) -> None:
         if isinstance(layer, str):
             try:
                 self._final_fc_layer = _get_module_from_name(self.model, layer)
diff --git a/tests/attr/test_interpretable_input.py b/tests/attr/test_interpretable_input.py
index 05e934014..0550b3562 100644
--- a/tests/attr/test_interpretable_input.py
+++ b/tests/attr/test_interpretable_input.py
@@ -25,7 +25,7 @@ def encode(self, text: str, return_tensors: None = None) -> List[int]: ...
     # pyre-fixme[43]: Incompatible overload. The implementation of
     # `DummyTokenizer.encode` does not accept all possible arguments of overload.
     # pyre-ignore[11]: Annotation `pt` is not defined as a type
-    def encode(self, text: str, return_tensors: Literal["pt"]) -> Tensor: ...
+    def encode(self, text: str, return_tensors: Literal["pt"]) -> Tensor: ...  # type: ignore  # noqa: E501 line too long
 
     def encode(
         self, text: str, return_tensors: Optional[str] = "pt"