tensorflow
diff --git a/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia.py‎
Lines changed: 7 additions & 2 deletions b/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_example.py‎
Lines changed: 27 additions & 7 deletions b/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_example.py‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_test.py‎
Lines changed: 67 additions & 10 deletions b/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_test.py‎
Lines changed: 67 additions & 10 deletions
diff --git a/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py‎
Lines changed: 17 additions & 5 deletions b/‎tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py‎
Lines changed: 17 additions & 5 deletions
@@ -14,7 +14,7 @@
 """Functions for advanced membership inference attacks."""
 
 import functools
-from typing import Sequence, Union
+from typing import Optional, Sequence, Union
 import numpy as np
 import scipy.stats
 from tensorflow_privacy.privacy.privacy_tests.utils import log_loss
@@ -197,6 +197,7 @@ def convert_logit_to_prob(logit: np.ndarray) -> np.ndarray:
 
 def calculate_statistic(pred: np.ndarray,
                         labels: np.ndarray,
+                        sample_weight: Optional[np.ndarray] = None,
                         is_logits: bool = True,
                         option: str = 'logit',
                         small_value: float = 1e-45):
@@ -215,6 +216,10 @@ def calculate_statistic(pred: np.ndarray,
       An array of size n by c where n is the number of samples and c is the
       number of classes
     labels: true labels of samples (integer valued)
+    sample_weight: a vector of weights of shape (num_samples, ) that are
+      assigned to individual samples. If not provided, then each sample is
+      given unit weight. Only the LogisticRegressionAttacker and the
+      RandomForestAttacker support sample weights.
     is_logits: whether pred is logits or probability vectors
     option: confidence using probability, xe loss, logit of confidence,
       confidence using logits, hinge loss
@@ -241,7 +246,7 @@ def calculate_statistic(pred: np.ndarray,
   if option in ['conf with prob', 'conf with logit']:
     return pred[range(n), labels]
   if option == 'xe':
-    return log_loss(labels, pred)
+    return log_loss(labels, pred, sample_weight=sample_weight)
   if option == 'logit':
     p_true = pred[range(n), labels]
     pred[range(n), labels] = 0
 
@@ -16,6 +16,8 @@
 import functools
 import gc
 import os
+from typing import Optional
+
 from absl import app
 from absl import flags
 import matplotlib.pyplot as plt
@@ -69,7 +71,11 @@ def plot_curve_with_area(x, y, xlabel, ylabel, ax, label, title=None):
   ax.title.set_text(title)
 
 
-def get_stat_and_loss_aug(model, x, y, batch_size=4096):
+def get_stat_and_loss_aug(model,
+                          x,
+                          y,
+                          sample_weight: Optional[np.ndarray] = None,
+                          batch_size=4096):
   """A helper function to get the statistics and losses.
 
   Here we get the statistics and losses for the original and
@@ -80,6 +86,10 @@ def get_stat_and_loss_aug(model, x, y, batch_size=4096):
     model: model to make prediction
     x: samples
     y: true labels of samples (integer valued)
+    sample_weight: a vector of weights of shape (n_samples, ) that are
+      assigned to individual samples. If not provided, then each sample is
+      given unit weight. Only the LogisticRegressionAttacker and the
+      RandomForestAttacker support sample weights.
     batch_size: the batch size for model.predict
 
   Returns:
@@ -89,8 +99,10 @@ def get_stat_and_loss_aug(model, x, y, batch_size=4096):
   for data in [x, x[:, :, ::-1, :]]:
     prob = amia.convert_logit_to_prob(
         model.predict(data, batch_size=batch_size))
-    losses.append(utils.log_loss(y, prob))
-    stat.append(amia.calculate_statistic(prob, y, convert_to_prob=False))
+    losses.append(utils.log_loss(y, prob, sample_weight=sample_weight))
+    stat.append(
+        amia.calculate_statistic(
+            prob, y, sample_weight=sample_weight, convert_to_prob=False))
   return np.vstack(stat).transpose(1, 0), np.vstack(losses).transpose(1, 0)
 
 
@@ -103,6 +115,8 @@ def main(unused_argv):
 
   # Load data.
   x, y = load_cifar10()
+  # Sample weights are set to `None` by default, but can be changed here.
+  sample_weight = None
   n = x.shape[0]
 
   # Train the target and shadow models. We will use one of the model in `models`
@@ -144,7 +158,7 @@ def main(unused_argv):
       print(f'Trained model #{i} with {in_indices[-1].sum()} examples.')
 
     # Get the statistics of the current model.
-    s, l = get_stat_and_loss_aug(model, x, y)
+    s, l = get_stat_and_loss_aug(model, x, y, sample_weight)
     stat.append(s)
     losses.append(l)
 
@@ -175,7 +189,9 @@ def main(unused_argv):
         stat_target, stat_in, stat_out, fix_variance=True)
     attack_input = AttackInputData(
         loss_train=scores[in_indices_target],
-        loss_test=scores[~in_indices_target])
+        loss_test=scores[~in_indices_target],
+        sample_weight_train=sample_weight,
+        sample_weight_test=sample_weight)
     result_lira = mia.run_attacks(attack_input).single_attack_results[0]
     print('Advanced MIA attack with Gaussian:',
           f'auc = {result_lira.get_auc():.4f}',
@@ -187,7 +203,9 @@ def main(unused_argv):
     scores = -amia.compute_score_offset(stat_target, stat_in, stat_out)
     attack_input = AttackInputData(
         loss_train=scores[in_indices_target],
-        loss_test=scores[~in_indices_target])
+        loss_test=scores[~in_indices_target],
+        sample_weight_train=sample_weight,
+        sample_weight_test=sample_weight)
     result_offset = mia.run_attacks(attack_input).single_attack_results[0]
     print('Advanced MIA attack with offset:',
           f'auc = {result_offset.get_auc():.4f}',
@@ -197,7 +215,9 @@ def main(unused_argv):
     loss_target = losses[idx][:, 0]
     attack_input = AttackInputData(
         loss_train=loss_target[in_indices_target],
-        loss_test=loss_target[~in_indices_target])
+        loss_test=loss_target[~in_indices_target],
+        sample_weight_train=sample_weight,
+        sample_weight_test=sample_weight)
     result_baseline = mia.run_attacks(attack_input).single_attack_results[0]
     print('Baseline MIA attack:', f'auc = {result_baseline.get_auc():.4f}',
           f'adv = {result_baseline.get_attacker_advantage():.4f}')
 
@@ -158,19 +158,21 @@ def test_calculate_statistic_logit(self):
     #        [0.09003057, 0.66524096, 0.24472847]])
     labels = np.array([1, 2])
 
-    stat = amia.calculate_statistic(logit, labels, is_logits, 'conf with prob')
+    stat = amia.calculate_statistic(logit, labels, None, is_logits,
+                                    'conf with prob')
     np.testing.assert_allclose(stat, np.array([0.72747516, 0.24472847]))
 
-    stat = amia.calculate_statistic(logit, labels, is_logits, 'xe')
+    stat = amia.calculate_statistic(logit, labels, None, is_logits, 'xe')
     np.testing.assert_allclose(stat, np.array([0.31817543, 1.40760596]))
 
-    stat = amia.calculate_statistic(logit, labels, is_logits, 'logit')
+    stat = amia.calculate_statistic(logit, labels, None, is_logits, 'logit')
     np.testing.assert_allclose(stat, np.array([0.98185009, -1.12692802]))
 
-    stat = amia.calculate_statistic(logit, labels, is_logits, 'conf with logit')
+    stat = amia.calculate_statistic(logit, labels, None, is_logits,
+                                    'conf with logit')
     np.testing.assert_allclose(stat, np.array([2, 0.]))
 
-    stat = amia.calculate_statistic(logit, labels, is_logits, 'hinge')
+    stat = amia.calculate_statistic(logit, labels, None, is_logits, 'hinge')
     np.testing.assert_allclose(stat, np.array([1, -1.]))
 
   def test_calculate_statistic_prob(self):
@@ -179,19 +181,74 @@ def test_calculate_statistic_prob(self):
     prob = np.array([[0.1, 0.85, 0.05], [0.1, 0.5, 0.4]])
     labels = np.array([1, 2])
 
-    stat = amia.calculate_statistic(prob, labels, is_logits, 'conf with prob')
+    stat = amia.calculate_statistic(prob, labels, None, is_logits,
+                                    'conf with prob')
     np.testing.assert_allclose(stat, np.array([0.85, 0.4]))
 
-    stat = amia.calculate_statistic(prob, labels, is_logits, 'xe')
+    stat = amia.calculate_statistic(prob, labels, None, is_logits, 'xe')
     np.testing.assert_allclose(stat, np.array([0.16251893, 0.91629073]))
 
-    stat = amia.calculate_statistic(prob, labels, is_logits, 'logit')
+    stat = amia.calculate_statistic(prob, labels, None, is_logits, 'logit')
     np.testing.assert_allclose(stat, np.array([1.73460106, -0.40546511]))
 
     np.testing.assert_raises(ValueError, amia.calculate_statistic, prob, labels,
-                             is_logits, 'conf with logit')
+                             None, is_logits, 'conf with logit')
     np.testing.assert_raises(ValueError, amia.calculate_statistic, prob, labels,
-                             is_logits, 'hinge')
+                             None, is_logits, 'hinge')
+
+  def test_calculate_statistic_logit_with_sample_weights(self):
+    """Test calculate_statistic with input as logit."""
+    is_logits = True
+    logit = np.array([[1, 2, -3.], [-1, 1, 0]])
+    # expected probability vector
+    # array([[0.26762315, 0.72747516, 0.00490169],
+    #        [0.09003057, 0.66524096, 0.24472847]])
+    labels = np.array([1, 2])
+    sample_weight = np.array([1.0, 0.5])
+
+    stat = amia.calculate_statistic(logit, labels, sample_weight, is_logits,
+                                    'conf with prob')
+    np.testing.assert_allclose(stat, np.array([0.72747516, 0.24472847]))
+
+    stat = amia.calculate_statistic(logit, labels, sample_weight, is_logits,
+                                    'xe')
+    np.testing.assert_allclose(stat, np.array([0.31817543, 0.70380298]))
+
+    stat = amia.calculate_statistic(logit, labels, sample_weight, is_logits,
+                                    'logit')
+    np.testing.assert_allclose(stat, np.array([0.98185009, -1.12692802]))
+
+    stat = amia.calculate_statistic(logit, labels, sample_weight, is_logits,
+                                    'conf with logit')
+    np.testing.assert_allclose(stat, np.array([2, 0.]))
+
+    stat = amia.calculate_statistic(logit, labels, sample_weight, is_logits,
+                                    'hinge')
+    np.testing.assert_allclose(stat, np.array([1, -1.]))
+
+  def test_calculate_statistic_prob_with_sample_weights(self):
+    """Test calculate_statistic with input as probability vector."""
+    is_logits = False
+    prob = np.array([[0.1, 0.85, 0.05], [0.1, 0.5, 0.4]])
+    labels = np.array([1, 2])
+    sample_weight = np.array([1.0, 0.5])
+
+    stat = amia.calculate_statistic(prob, labels, sample_weight, is_logits,
+                                    'conf with prob')
+    np.testing.assert_allclose(stat, np.array([0.85, 0.4]))
+
+    stat = amia.calculate_statistic(prob, labels, sample_weight, is_logits,
+                                    'xe')
+    np.testing.assert_allclose(stat, np.array([0.16251893, 0.458145365]))
+
+    stat = amia.calculate_statistic(prob, labels, sample_weight, is_logits,
+                                    'logit')
+    np.testing.assert_allclose(stat, np.array([1.73460106, -0.40546511]))
+
+    np.testing.assert_raises(ValueError, amia.calculate_statistic, prob, labels,
+                             None, is_logits, 'conf with logit')
+    np.testing.assert_raises(ValueError, amia.calculate_statistic, prob, labels,
+                             None, is_logits, 'hinge')
 
 
 if __name__ == '__main__':
 
@@ -20,7 +20,7 @@
 import logging
 import os
 import pickle
-from typing import Any, Callable, Iterable, MutableSequence, Optional, Union
+from typing import Any, Iterable, MutableSequence, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -203,6 +203,10 @@ class AttackInputData:
   labels_train: Optional[np.ndarray] = None
   labels_test: Optional[np.ndarray] = None
 
+  # Sample weights, if provided.
+  sample_weight_train: Optional[np.ndarray] = None
+  sample_weight_test: Optional[np.ndarray] = None
+
   # Explicitly specified loss. If provided, this is used instead of deriving
   # loss from logits and labels
   loss_train: Optional[np.ndarray] = None
@@ -219,8 +223,7 @@ class AttackInputData:
   # string representation, or a callable.
   # If a callable is provided, it should take in two argument, the 1st is
   # labels, the 2nd is logits or probs.
-  loss_function: Union[Callable[[np.ndarray, np.ndarray], np.ndarray], str,
-                       utils.LossFunction] = utils.LossFunction.CROSS_ENTROPY
+  loss_function: utils.LossFunctionCallable = utils.LossFunction.CROSS_ENTROPY
   # Whether `loss_function` will be called with logits or probs. If not set
   # (None), will decide by availablity of logits and probs and logits is
   # preferred when both are available.
@@ -309,7 +312,8 @@ def get_loss_train(self):
       self.loss_function_using_logits = (self.logits_train is not None)
     return utils.get_loss(self.loss_train, self.labels_train, self.logits_train,
                           self.probs_train, self.loss_function,
-                          self.loss_function_using_logits, self.multilabel_data)
+                          self.loss_function_using_logits, self.multilabel_data,
+                          self.sample_weight_train)
 
   def get_loss_test(self):
     """Calculates (if needed) cross-entropy losses for the test set.
@@ -321,7 +325,8 @@ def get_loss_test(self):
       self.loss_function_using_logits = bool(self.logits_test)
     return utils.get_loss(self.loss_test, self.labels_test, self.logits_test,
                           self.probs_test, self.loss_function,
-                          self.loss_function_using_logits, self.multilabel_data)
+                          self.loss_function_using_logits, self.multilabel_data,
+                          self.sample_weight_test)
 
   def get_entropy_train(self):
     """Calculates prediction entropy for the training set."""
@@ -367,6 +372,11 @@ def get_test_size(self):
     """Returns the number of examples of the test set."""
     return self.get_test_shape()[0]
 
+  def has_nonnull_sample_weights(self):
+    """Whether both the train and test input data have sample weights."""
+    return (self.sample_weight_train is not None and
+            self.sample_weight_test is not None)
+
   def is_multihot_labels(self, arr, arr_name) -> bool:
     """Check if the 2D array is multihot, with values in [0, 1].
 
@@ -556,6 +566,8 @@ def __str__(self):
     _append_array_shape(self.probs_test, 'probs_test', result)
     _append_array_shape(self.labels_train, 'labels_train', result)
     _append_array_shape(self.labels_test, 'labels_test', result)
+    _append_array_shape(self.sample_weight_train, 'sample_weight_train', result)
+    _append_array_shape(self.sample_weight_test, 'sample_weight_test', result)
     result.append(')')
     return '\n'.join(result)