meta-pytorch · 99warriors · Dec 19, 2022
diff --git a/captum/influence/_core/influence.py b/captum/influence/_core/influence.py
@@ -12,21 +12,19 @@ class DataInfluence(ABC):
     An abstract class to define model data influence skeleton.
     """
 
-    def __init_(
-        self, model: Module, influence_src_dataset: Dataset, **kwargs: Any
-    ) -> None:
+    def __init_(self, model: Module, train_dataset: Dataset, **kwargs: Any) -> None:
         r"""
         Args:
             model (torch.nn.Module): An instance of pytorch model.
-            influence_src_dataset (torch.utils.data.Dataset): PyTorch Dataset that is
+            train_dataset (torch.utils.data.Dataset): PyTorch Dataset that is
                     used to create a PyTorch Dataloader to iterate over the dataset and
                     its labels. This is the dataset for which we will be seeking for
                     influential instances. In most cases this is the training dataset.
             **kwargs: Additional key-value arguments that are necessary for specific
                     implementation of `DataInfluence` abstract class.
         """
         self.model = model
-        self.influence_src_dataset = influence_src_dataset
+        self.train_dataset = train_dataset
 
     @abstractmethod
     def influence(self, inputs: Any = None, **kwargs: Any) -> Any:

diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py
diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py
diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py
@@ -189,7 +189,6 @@ def _get_k_most_influential_helper(
     influence_src_dataloader: DataLoader,
     influence_batch_fn: Callable,
     inputs: Tuple[Any, ...],
-    targets: Optional[Tensor],
     k: int = 5,
     proponents: bool = True,
     show_progress: bool = False,
@@ -204,13 +203,12 @@ def _get_k_most_influential_helper(
         influence_src_dataloader (DataLoader): The DataLoader, representing training
                 data, for which we want to compute proponents / opponents.
         influence_batch_fn (Callable): A callable that will be called via
-                `influence_batch_fn(inputs, targets, batch)`, where `batch` is a batch
+                `influence_batch_fn(inputs, batch)`, where `batch` is a batch
                 in the `influence_src_dataloader` argument.
-        inputs (tuple[Any, ...]): A batch of examples. Does not represent labels,
-                which are passed as `targets`.
-        targets (Tensor, optional): If computing TracIn scores on a loss function,
-                these are the labels corresponding to the batch `inputs`.
-                Default: None
+        inputs (tuple[Any, ...]): This argument represents the test batch, and is a
+                single tuple of any, where the last element is assumed to be the labels
+                for the batch. That is, `model(*batch[0:-1])` produces the output for
+                `model`, and `batch[-1]` are the labels, if any.
         k (int, optional): The number of proponents or opponents to return per test
                 instance.
                 Default: 5
@@ -272,7 +270,7 @@ def _get_k_most_influential_helper(
     for batch in influence_src_dataloader:
 
         # calculate tracin_scores for the batch
-        batch_tracin_scores = influence_batch_fn(inputs, targets, batch)
+        batch_tracin_scores = influence_batch_fn(inputs, batch)
         batch_tracin_scores *= multiplier
 
         # get the top-k indices and tracin_scores for the batch

diff --git a/tests/influence/_core/test_dataloader.py b/tests/influence/_core/test_dataloader.py
@@ -10,6 +10,7 @@
 from parameterized import parameterized
 from tests.helpers.basic import assertTensorAlmostEqual, BaseTest
 from tests.influence._utils.common import (
+    _format_batch_into_tuple,
     build_test_name_func,
     DataInfluenceConstructor,
     get_random_model_and_data,
@@ -76,7 +77,8 @@ def test_tracin_dataloader(
             )
 
             train_scores = tracin.influence(
-                test_samples, test_labels, k=None, unpack_inputs=unpack_inputs
+                _format_batch_into_tuple(test_samples, test_labels, unpack_inputs),
+                k=None,
             )
 
             tracin_dataloader = tracin_constructor(
@@ -88,7 +90,8 @@ def test_tracin_dataloader(
             )
 
             train_scores_dataloader = tracin_dataloader.influence(
-                test_samples, test_labels, k=None, unpack_inputs=unpack_inputs
+                _format_batch_into_tuple(test_samples, test_labels, unpack_inputs),
+                k=None,
             )
 
             assertTensorAlmostEqual(

diff --git a/tests/influence/_core/test_tracin_intermediate_quantities.py b/tests/influence/_core/test_tracin_intermediate_quantities.py
@@ -12,6 +12,7 @@
 from parameterized import parameterized
 from tests.helpers.basic import assertTensorAlmostEqual, BaseTest
 from tests.influence._utils.common import (
+    _format_batch_into_tuple,
     build_test_name_func,
     DataInfluenceConstructor,
     get_random_model_and_data,
@@ -224,25 +225,13 @@ def test_tracin_intermediate_quantities_consistent(
             )
 
             # compute influence scores without using `compute_intermediate_quantities`
+            test_batch = _format_batch_into_tuple(
+                test_features, test_labels, unpack_inputs
+            )
             scores = tracin.influence(
-                test_features, test_labels, unpack_inputs=unpack_inputs
+                test_batch,
             )
 
-            # compute influence scores using `compute_intermediate_quantities`
-            # we combine `test_features` and `test_labels` into a single tuple
-            # `test_batch` to pass to the model, with the assumption that
-            # `model(test_batch[0:-1]` produces the predictions, and `test_batch[-1]`
-            # are the labels.  We do this due to the assumptions made by the
-            # `compute_intermediate_quantities` method. Therefore, how we
-            # form `test_batch` depends on whether `unpack_inputs` is True or False
-            if not unpack_inputs:
-                # `test_features` is a Tensor
-                test_batch = (test_features, test_labels)
-            else:
-                # `test_features` is a tuple, so we unpack it to place in tuple,
-                # along with `test_labels`
-                test_batch = (*test_features, test_labels)  # type: ignore[assignment]
-
             # the influence score is the dot product of intermediate quantities
             intermediate_quantities_scores = torch.matmul(
                 intermediate_quantities_tracin.compute_intermediate_quantities(

diff --git a/tests/influence/_core/test_tracin_k_most_influential.py b/tests/influence/_core/test_tracin_k_most_influential.py
@@ -8,6 +8,7 @@
 from parameterized import parameterized
 from tests.helpers.basic import assertTensorAlmostEqual, BaseTest
 from tests.influence._utils.common import (
+    _format_batch_into_tuple,
     build_test_name_func,
     DataInfluenceConstructor,
     get_random_model_and_data,
@@ -107,15 +108,14 @@ def test_tracin_k_most_influential(
             )
 
             train_scores = tracin.influence(
-                test_samples, test_labels, k=None, unpack_inputs=unpack_inputs
+                _format_batch_into_tuple(test_samples, test_labels, unpack_inputs),
+                k=None,
             )
             sort_idx = torch.argsort(train_scores, dim=1, descending=proponents)[:, 0:k]
             idx, _train_scores = tracin.influence(
-                test_samples,
-                test_labels,
+                _format_batch_into_tuple(test_samples, test_labels, unpack_inputs),
                 k=k,
                 proponents=proponents,
-                unpack_inputs=unpack_inputs,
             )
             for i in range(len(idx)):
                 # check that idx[i] is correct

diff --git a/tests/influence/_core/test_tracin_regression.py b/tests/influence/_core/test_tracin_regression.py
@@ -183,19 +183,19 @@ def test_tracin_regression(
                     criterion,
                 )
 
-                train_scores = tracin.influence(train_inputs, train_labels)
+                train_scores = tracin.influence((train_inputs, train_labels))
                 idx, _ = tracin.influence(
-                    train_inputs, train_labels, k=len(dataset), proponents=True
+                    (train_inputs, train_labels), k=len(dataset), proponents=True
                 )
                 # check that top influence is one with maximal value
                 # (and hence gradient)
                 for i in range(len(idx)):
                     self.assertEqual(idx[i][0], 15)
 
                 # check influence scores of test data
-                test_scores = tracin.influence(test_inputs, test_labels)
+                test_scores = tracin.influence((test_inputs, test_labels))
                 idx, _ = tracin.influence(
-                    test_inputs, test_labels, k=len(test_inputs), proponents=True
+                    (test_inputs, test_labels), k=len(test_inputs), proponents=True
                 )
                 # check that top influence is one with maximal value
                 # (and hence gradient)
@@ -226,17 +226,17 @@ def test_tracin_regression(
                     sample_wise_grads_per_batch=True,
                 )
 
-                train_scores = tracin.influence(train_inputs, train_labels)
+                train_scores = tracin.influence((train_inputs, train_labels))
                 train_scores_sample_wise_trick = tracin_sample_wise_trick.influence(
-                    train_inputs, train_labels
+                    (train_inputs, train_labels)
                 )
                 assertTensorAlmostEqual(
                     self, train_scores, train_scores_sample_wise_trick
                 )
 
-                test_scores = tracin.influence(test_inputs, test_labels)
+                test_scores = tracin.influence((test_inputs, test_labels))
                 test_scores_sample_wise_trick = tracin_sample_wise_trick.influence(
-                    test_inputs, test_labels
+                    (test_inputs, test_labels)
                 )
                 assertTensorAlmostEqual(
                     self, test_scores, test_scores_sample_wise_trick
@@ -288,7 +288,7 @@ def test_tracin_regression_1D_numerical(
                 criterion,
             )
 
-            train_scores = tracin.influence(train_inputs, train_labels, k=None)
+            train_scores = tracin.influence((train_inputs, train_labels), k=None)
 
             r"""
             Derivation for gradient / resulting TracIn score:
@@ -382,9 +382,9 @@ def test_tracin_identity_regression(
 
                 # check influence scores of training data
 
-                train_scores = tracin.influence(train_inputs, train_labels)
+                train_scores = tracin.influence((train_inputs, train_labels))
                 idx, _ = tracin.influence(
-                    train_inputs, train_labels, k=len(dataset), proponents=True
+                    (train_inputs, train_labels), k=len(dataset), proponents=True
                 )
 
                 # check that top influence for an instance is itself
@@ -415,9 +415,9 @@ def test_tracin_identity_regression(
                     sample_wise_grads_per_batch=True,
                 )
 
-                train_scores = tracin.influence(train_inputs, train_labels)
+                train_scores = tracin.influence((train_inputs, train_labels))
                 train_scores_tracin_sample_wise_trick = (
-                    tracin_sample_wise_trick.influence(train_inputs, train_labels)
+                    tracin_sample_wise_trick.influence((train_inputs, train_labels))
                 )
                 assertTensorAlmostEqual(
                     self, train_scores, train_scores_tracin_sample_wise_trick
@@ -496,5 +496,5 @@ def test_loss_fn(input, target):
             )
 
             # check influence scores of training data. they should all be 0
-            train_scores = tracin.influence(train_inputs, train_labels, k=None)
+            train_scores = tracin.influence((train_inputs, train_labels), k=None)
             assertTensorAlmostEqual(self, train_scores, torch.zeros(train_scores.shape))
diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py
@@ -8,6 +8,7 @@
 from parameterized import parameterized
 from tests.helpers.basic import assertTensorAlmostEqual, BaseTest
 from tests.influence._utils.common import (
+    _format_batch_into_tuple,
     build_test_name_func,
     DataInfluenceConstructor,
     get_random_model_and_data,
@@ -108,10 +109,10 @@ def test_tracin_self_influence(
                 criterion,
             )
             train_scores = tracin.influence(
-                train_dataset.samples,
-                train_dataset.labels,
+                _format_batch_into_tuple(
+                    train_dataset.samples, train_dataset.labels, unpack_inputs
+                ),
                 k=None,
-                unpack_inputs=unpack_inputs,
             )
             # calculate self_tracin_scores
             self_tracin_scores = tracin.self_influence(

diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py
@@ -178,8 +178,7 @@ def test_tracin_show_progress(
                 elif mode == "influence":
 
                     tracin.influence(
-                        test_samples,
-                        test_labels,
+                        (test_samples, test_labels),
                         k=None,
                         show_progress=True,
                     )
@@ -196,8 +195,7 @@ def test_tracin_show_progress(
                 elif mode == "k-most":
 
                     tracin.influence(
-                        test_samples,
-                        test_labels,
+                        (test_samples, test_labels),
                         k=2,
                         proponents=True,
                         show_progress=True,
@@ -218,8 +216,7 @@ def test_tracin_show_progress(
                     mock_stderr.truncate(0)
 
                     tracin.influence(
-                        test_samples,
-                        test_labels,
+                        (test_samples, test_labels),
                         k=2,
                         proponents=False,
                         show_progress=True,

diff --git a/tests/influence/_core/test_tracin_validation.py b/tests/influence/_core/test_tracin_validation.py
@@ -63,4 +63,4 @@ def test_tracin_require_inputs_dataset(
                 batch_size=1,
             )
             with self.assertRaisesRegex(AssertionError, "required."):
-                tracin.influence(None, test_labels, k=None, unpack_inputs=False)
+                tracin.influence(None, k=None)
diff --git a/tests/influence/_core/test_tracin_xor.py b/tests/influence/_core/test_tracin_xor.py
@@ -258,7 +258,7 @@ def test_tracin_xor(
                     batch_size,
                     criterion,
                 )
-                test_scores = tracin.influence(testset, testlabels)
+                test_scores = tracin.influence((testset, testlabels))
                 idx = torch.argsort(test_scores, dim=1, descending=True)
                 # check that top 5 influences have matching binary classification
                 for i in range(len(idx)):
@@ -288,9 +288,9 @@ def test_tracin_xor(
                     criterion,
                     sample_wise_grads_per_batch=True,
                 )
-                test_scores = tracin.influence(testset, testlabels)
+                test_scores = tracin.influence((testset, testlabels))
                 test_scores_sample_wise_trick = tracin_sample_wise_trick.influence(
-                    testset, testlabels
+                    (testset, testlabels)
                 )
                 assertTensorAlmostEqual(
                     self, test_scores, test_scores_sample_wise_trick

diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py
@@ -2,7 +2,7 @@
 import os
 import unittest
 from functools import partial
-from typing import Callable, Iterator, List, Optional, Union
+from typing import Callable, Iterator, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -14,6 +14,7 @@
 )
 from parameterized import parameterized
 from parameterized.parameterized import param
+from torch import Tensor
 from torch.nn import Module
 from torch.utils.data import DataLoader, Dataset
 
@@ -366,3 +367,12 @@ def build_test_name_func(args_to_skip: Optional[List[str]] = None):
     """
 
     return partial(generate_test_name, args_to_skip=args_to_skip)
+
+
+def _format_batch_into_tuple(
+    inputs: Union[Tuple, Tensor], targets: Tensor, unpack_inputs: bool
+):
+    if unpack_inputs:
+        return (*inputs, targets)
+    else:
+        return (inputs, targets)