Split the get_random_model_and_data() method [1/n] (pytorch#1252)

Summary: As titled. The get_random_model_and_data() method is used to construct testing data for influence and it is reported as too complex by flake8 (https://www.flake8rules.com/rules/C901.html). This series of diff will split the method and abstract the common parts. This diff isolate the model part for different gpu usage settings. It also eliminate the mix usage of bool and str. Differential Revision: D55165054
cicichen01 · Mar 22, 2024 · bac9fdc · bac9fdc
1 parent f94d3ee
commit bac9fdc
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 86 deletions.
diff --git a/tests/influence/_core/test_arnoldi_influence.py b/tests/influence/_core/test_arnoldi_influence.py
@@ -1,5 +1,5 @@
 import tempfile
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Optional, Tuple
 
 import torch
 
@@ -27,8 +27,9 @@
     generate_assymetric_matrix_given_eigenvalues,
     generate_symmetric_matrix_given_eigenvalues,
     get_random_model_and_data,
+    GPU_SETTING_LIST,
+    is_gpu,
     UnpackDataset,
-    USE_GPU_LIST,
 )
 from torch import Tensor
 from torch.utils.data import DataLoader
@@ -229,6 +230,17 @@ def _param_matmul(params: Tuple[Tensor]):
                         "max",
                     )
 
+    # TODO: for some unknow reason, this test and the test below does not work
+    # on `cuda_data_parallel` setting. We need to investigate why.
+    # Use a local version of setting list for these two tests for now
+    # since we have changed the default setting list to includes all options.
+    # (This is also used in many other tests, which also needs to be unified later).
+    gpu_setting_list = (
+        ["", "cuda"]
+        if torch.cuda.is_available() and torch.cuda.device_count() != 0
+        else [""]
+    )
+
     @parameterized.expand(
         [
             (
@@ -237,17 +249,17 @@ def _param_matmul(params: Tuple[Tensor]):
                 delta,
                 mode,
                 unpack_inputs,
-                use_gpu,
+                gpu_setting,
             )
-            for use_gpu in USE_GPU_LIST
+            for gpu_setting in gpu_setting_list
             for (influence_constructor_1, influence_constructor_2, delta) in [
                 # compare implementations, when considering only 1 layer
                 (
                     DataInfluenceConstructor(
                         NaiveInfluenceFunction,
                         layers=(
                             ["module.linear1"]
-                            if use_gpu == "cuda_dataparallel"
+                            if gpu_setting == "cuda_dataparallel"
                             else ["linear1"]
                         ),
                         projection_dim=5,
@@ -258,7 +270,7 @@ def _param_matmul(params: Tuple[Tensor]):
                         ArnoldiInfluenceFunction,
                         layers=(
                             ["module.linear1"]
-                            if use_gpu == "cuda_dataparallel"
+                            if gpu_setting == "cuda_dataparallel"
                             else ["linear1"]
                         ),
                         arnoldi_dim=50,
@@ -314,7 +326,7 @@ def test_compare_implementations_trained_NN_model_and_data(
         delta: float,
         mode: str,
         unpack_inputs: bool,
-        use_gpu: Union[bool, str],
+        gpu_setting: Optional[str],
     ) -> None:
         """
         this compares 2 influence implementations on a trained 2-layer NN model.
@@ -329,14 +341,15 @@ def test_compare_implementations_trained_NN_model_and_data(
             delta,
             mode,
             unpack_inputs,
-            use_gpu,
+            gpu_setting,
         )
 
     # this compares `ArnoldiInfluenceFunction` and `NaiveInfluenceFunction` on randomly
     # generated data. because these implementations are numerically equivalent, we
     # can also compare the intermediate quantities. we do not compare with
     # `NaiveInfluence` because on randomly generated data, it is not comparable,
     # conceptually, with the other implementations, due to numerical issues.
+
     @parameterized.expand(
         [
             (
@@ -345,16 +358,16 @@ def test_compare_implementations_trained_NN_model_and_data(
                 delta,
                 mode,
                 unpack_inputs,
-                use_gpu,
+                gpu_setting,
             )
-            for use_gpu in USE_GPU_LIST
+            for gpu_setting in gpu_setting_list
             for (influence_constructor_1, influence_constructor_2, delta) in [
                 (
                     DataInfluenceConstructor(
                         NaiveInfluenceFunction,
                         layers=(
                             ["module.linear1"]
-                            if use_gpu == "cuda_dataparallel"
+                            if gpu_setting == "cuda_dataparallel"
                             else ["linear1"]
                         ),
                         show_progress=False,
@@ -364,7 +377,7 @@ def test_compare_implementations_trained_NN_model_and_data(
                         ArnoldiInfluenceFunction,
                         layers=(
                             ["module.linear1"]
-                            if use_gpu == "cuda_dataparallel"
+                            if gpu_setting == "cuda_dataparallel"
                             else ["linear1"]
                         ),
                         show_progress=False,
@@ -397,7 +410,7 @@ def test_compare_implementations_random_model_and_data(
         delta: float,
         mode: str,
         unpack_inputs: bool,
-        use_gpu: Union[bool, str],
+        gpu_setting: Optional[str],
     ) -> None:
         """
         this compares 2 influence implementations on a trained 2-layer NN model.
@@ -412,7 +425,7 @@ def test_compare_implementations_random_model_and_data(
             delta,
             mode,
             unpack_inputs,
-            use_gpu,
+            gpu_setting,
         )
 
     def _test_compare_implementations(
@@ -423,7 +436,7 @@ def _test_compare_implementations(
         delta: float,
         mode: str,
         unpack_inputs: bool,
-        use_gpu: Union[bool, str],
+        gpu_setting: Optional[str],
     ) -> None:
         """
         checks that 2 implementations of `InfluenceFunctionBase` return the same
@@ -444,13 +457,14 @@ def _test_compare_implementations(
                 tmpdir,
                 unpack_inputs,
                 return_test_data=True,
-                use_gpu=use_gpu,
+                gpu_setting=gpu_setting,
                 return_hessian_data=True,
                 model_type=model_type,
             )
 
             train_dataset = DataLoader(train_dataset, batch_size=5)
 
+            use_gpu = is_gpu(gpu_setting)
             hessian_dataset = (
                 ExplicitDataset(hessian_samples, hessian_labels, use_gpu)
                 if not unpack_inputs

diff --git a/tests/influence/_core/test_naive_influence.py b/tests/influence/_core/test_naive_influence.py
@@ -1,5 +1,5 @@
 import tempfile
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Optional, Tuple
 
 import torch
 
@@ -21,9 +21,10 @@
     DataInfluenceConstructor,
     ExplicitDataset,
     get_random_model_and_data,
+    GPU_SETTING_LIST,
+    is_gpu,
     Linear,
     UnpackDataset,
-    USE_GPU_LIST,
 )
 from torch.utils.data import DataLoader
 
@@ -59,17 +60,17 @@ def test_flatten_unflattener(self, param_shapes: List[Tuple[int, ...]]) -> None:
                 delta,
                 mode,
                 unpack_inputs,
-                use_gpu,
+                gpu_setting,
             )
             for reduction in ["none", "sum", "mean"]
-            for use_gpu in USE_GPU_LIST
+            for gpu_setting in GPU_SETTING_LIST
             for (influence_constructor, delta) in [
                 (
                     DataInfluenceConstructor(
                         NaiveInfluenceFunction,
                         layers=(
                             ["module.linear"]
-                            if use_gpu == "cuda_dataparallel"
+                            if gpu_setting == "cuda_dataparallel"
                             else ["linear"]
                         ),
                         projection_dim=None,
@@ -109,7 +110,7 @@ def test_matches_linear_regression(
         delta: float,
         mode: str,
         unpack_inputs: bool,
-        use_gpu: Union[bool, str],
+        gpu_setting: Optional[str],
     ) -> None:
         """
         this tests that `NaiveInfluence`, the simplest implementation, agree with the
@@ -129,13 +130,14 @@ def test_matches_linear_regression(
                 tmpdir,
                 unpack_inputs,
                 return_test_data=True,
-                use_gpu=use_gpu,
+                gpu_setting=gpu_setting,
                 return_hessian_data=True,
                 model_type="trained_linear",
             )
 
             train_dataset = DataLoader(train_dataset, batch_size=5)
 
+            use_gpu = is_gpu(gpu_setting)
             hessian_dataset = (
                 ExplicitDataset(hessian_samples, hessian_labels, use_gpu)
                 if not unpack_inputs

diff --git a/tests/influence/_core/test_tracin_k_most_influential.py b/tests/influence/_core/test_tracin_k_most_influential.py
@@ -1,5 +1,5 @@
 import tempfile
-from typing import Callable, Union
+from typing import Callable, Optional
 
 import torch
 import torch.nn as nn
@@ -13,22 +13,17 @@
     build_test_name_func,
     DataInfluenceConstructor,
     get_random_model_and_data,
+    GPU_SETTING_LIST,
+    is_gpu,
 )
 
 
 class TestTracInGetKMostInfluential(BaseTest):
-
-    use_gpu_list = (
-        [False, "cuda", "cuda_data_parallel"]
-        if torch.cuda.is_available() and torch.cuda.device_count() != 0
-        else [False]
-    )
-
     param_list = []
     for batch_size, k in [(4, 7), (7, 4), (40, 5), (5, 40), (40, 45)]:
         for unpack_inputs in [True, False]:
             for proponents in [True, False]:
-                for use_gpu in use_gpu_list:
+                for gpu_setting in GPU_SETTING_LIST:
                     for reduction, constr, aggregate in [
                         (
                             "none",
@@ -51,7 +46,7 @@ class TestTracInGetKMostInfluential(BaseTest):
                                 name="linear2",
                                 layers=(
                                     ["module.linear2"]
-                                    if use_gpu == "cuda_data_parallel"
+                                    if gpu_setting == "cuda_data_parallel"
                                     else ["linear2"]
                                 ),
                             ),
@@ -61,7 +56,7 @@ class TestTracInGetKMostInfluential(BaseTest):
                         if not (
                             "sample_wise_grads_per_batch" in constr.kwargs
                             and constr.kwargs["sample_wise_grads_per_batch"]
-                            and use_gpu
+                            and is_gpu(gpu_setting)
                         ):
                             param_list.append(
                                 (
@@ -71,7 +66,7 @@ class TestTracInGetKMostInfluential(BaseTest):
                                     proponents,
                                     batch_size,
                                     k,
-                                    use_gpu,
+                                    gpu_setting,
                                     aggregate,
                                 )
                             )
@@ -88,7 +83,7 @@ def test_tracin_k_most_influential(
         proponents: bool,
         batch_size: int,
         k: int,
-        use_gpu: Union[bool, str],
+        gpu_setting: Optional[str],
         aggregate: bool,
     ) -> None:
         """
@@ -107,7 +102,7 @@ def test_tracin_k_most_influential(
                 tmpdir,
                 unpack_inputs,
                 True,
-                use_gpu,
+                gpu_setting,
             )
 
             self.assertTrue(isinstance(reduction, str))