From ad814d9ff512c01a29be0006d0d5897d166456e6 Mon Sep 17 00:00:00 2001
From: dPys <dpisner@utexas.edu>
Date: Sat, 9 Nov 2019 15:48:57 -0600
Subject: [PATCH 1/4] enh: add a data-driven *b = 0* estimator

---
 dmriprep/interfaces/vectors.py       |  4 +-
 dmriprep/utils/tests/test_vectors.py |  5 ++
 dmriprep/utils/vectors.py            | 85 ++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/dmriprep/interfaces/vectors.py b/dmriprep/interfaces/vectors.py
index 1329bd62..1ebb9f96 100644
--- a/dmriprep/interfaces/vectors.py
+++ b/dmriprep/interfaces/vectors.py
@@ -21,6 +21,7 @@ class _CheckGradientTableInputSpec(BaseInterfaceInputSpec):
     b0_threshold = traits.Float(B0_THRESHOLD, usedefault=True)
     bvec_norm_epsilon = traits.Float(BVEC_NORM_EPSILON, usedefault=True)
     b_scale = traits.Bool(True, usedefault=True)
+    image_consistency = traits.Bool(False, usedefault=True)
 
 
 class _CheckGradientTableOutputSpec(TraitedSpec):
@@ -75,8 +76,9 @@ def _run_interface(self, runtime):
             bvals=_undefined(self.inputs, "in_bval"),
             rasb_file=rasb_file,
             b_scale=self.inputs.b_scale,
+            image_consistency=self.inputs.image_consistency,
             bvec_norm_epsilon=self.inputs.bvec_norm_epsilon,
-            b0_threshold=self.inputs.b0_threshold,
+            b0_threshold=self.inputs.b0_threshold
         )
         pole = table.pole
         self._results["pole"] = tuple(pole)
diff --git a/dmriprep/utils/tests/test_vectors.py b/dmriprep/utils/tests/test_vectors.py
index f6afcf6c..0e3e4aaf 100644
--- a/dmriprep/utils/tests/test_vectors.py
+++ b/dmriprep/utils/tests/test_vectors.py
@@ -86,6 +86,11 @@ def test_corruption(tmpdir, dipy_test_data, monkeypatch):
     assert -1.0 <= np.max(np.abs(dgt.gradients[..., :-1])) <= 1.0
     assert dgt.normalized is True
 
+    # Test image gradient consistency
+    dgt = v.DiffusionGradientTable(dwi_file=dipy_test_data['dwi_file'],
+                                   bvals=bvals, bvecs=bvecs)
+    assert dgt.gradient_consistency is None
+
     def mock_func(*args, **kwargs):
         return "called!"
 
diff --git a/dmriprep/utils/vectors.py b/dmriprep/utils/vectors.py
index 43041aaa..b7992be7 100644
--- a/dmriprep/utils/vectors.py
+++ b/dmriprep/utils/vectors.py
@@ -103,6 +103,7 @@ def __init__(
 
         if dwi_file is not None:
             self.affine = dwi_file
+            self._dwi_file = dwi_file
         if rasb_file is not None:
             self.gradients = rasb_file
             if self.affine is not None:
@@ -278,6 +279,18 @@ def to_filename(self, filename, filetype="rasb"):
         else:
             raise ValueError('Unknown filetype "%s"' % filetype)
 
+    # @property
+    # def gradient_consistency(self):
+    #     """
+    #     Check that b gradients and signal variation in dwi image are consistent with one another.
+    #     """
+    #     if (Path(self._dwi_file).exists() is True) and (self._image_consistency is True):
+    #         return image_gradient_consistency_check(str(self._dwi_file), self.bvecs, self.bvals,
+    #                                                 b0_threshold=self._b0_thres)
+    #     else:
+    #         raise FileNotFoundError(
+    #             "DWI file not found, and is required for checking image-gradient consistency.")
+
 
 def normalize_gradients(
     bvecs,
@@ -483,3 +496,75 @@ def bvecs2ras(affine, bvecs, norm=True, bvec_norm_epsilon=0.2):
         rotated_bvecs[~b0s] /= norms_bvecs[~b0s, np.newaxis]
         rotated_bvecs[b0s] = np.zeros(3)
     return rotated_bvecs
+
+
+def image_gradient_consistency_check(dwi_file, bvecs, bvals, b0_threshold=B0_THRESHOLD):
+    """
+    Check that gradient encoding patterns found in the b-values correspond to those
+    found in the signal intensity variance across volumes of the dwi image.
+
+    Parameters
+    ----------
+    dwi_file : str
+        Optionally provide a file path to the diffusion-weighted image series to which the
+        bvecs/bvals should correspond.
+    bvecs : m x n 2d array
+        B-vectors array.
+    bvals : 1d array
+        B-values float array.
+    b0_threshold : float
+        Gradient threshold below which volumes and vectors are considered B0's.
+    """
+    import nibabel as nib
+    from dipy.core.gradients import gradient_table_from_bvals_bvecs
+    from sklearn.cluster import MeanShift, estimate_bandwidth
+
+    # Build gradient table object
+    gtab = gradient_table_from_bvals_bvecs(bvals, bvecs, b0_threshold=b0_threshold)
+
+    # Check that number of image volumes corresponds to the number of gradient encodings.
+    volume_num = nib.load(dwi_file).shape[-1]
+    if not len(bvals) == volume_num:
+        raise Exception("Expected %d total image samples but found %d total gradient encoding values", volume_num,
+                        len(bvals))
+    # Evaluate B0 locations relative to mean signal variation.
+    data = np.array(nib.load(dwi_file).dataobj)
+    signal_means = []
+    for vol in range(data.shape[-1]):
+        signal_means.append(np.mean(data[:, :, :, vol]))
+    signal_b0_indices = np.where(signal_means > np.mean(signal_means)+3*np.std(signal_means))[0]
+
+    # Check B0 locations first
+    if not np.allclose(np.where(gtab.b0s_mask == True)[0], signal_b0_indices):
+        raise UserWarning('B0 indices in vectors do not correspond to relative high-signal contrast volumes '
+                          'detected in dwi image.')
+
+    # Next check number of unique b encodings (i.e. shells) and their indices
+    X = np.array(signal_means).reshape(-1, 1)
+    ms = MeanShift(bandwidth=estimate_bandwidth(X, quantile=0.1), bin_seeding=True)
+    ms.fit(X)
+    labs, idx = np.unique(ms.labels_, return_index=True)
+    ix_img = []
+    i = -1
+    for val in range(len(ms.labels_)):
+        if val in np.sort(idx[::-1]):
+            i = i + 1
+        ix_img.append(labs[i])
+    ix_img = np.array(ix_img)
+
+    ix_vec = []
+    i = -1
+    for val in range(len(bvals)):
+        if bvals[val] != bvals[val-1]:
+            i = i + 1
+        ix_vec.append(i)
+    ix_vec = np.array(ix_vec)
+
+    if len(ms.cluster_centers_) != len(np.unique(bvals)):
+        raise UserWarning('Number of unique b-values does not correspond to number of unique signal gradient '
+                          'encoding intensities in the dwi image.')
+
+    if np.all(np.isclose(ix_img, ix_vec)) is True:
+        raise UserWarning('Positions of b-value B0\'s and shell(s) do not correspond to signal intensity '
+                          'fluctuation patterns in the dwi image.')
+    return

From 99d008f3d7636eb31c44e5d4894d50b4e64c4045 Mon Sep 17 00:00:00 2001
From: oesteban <code@oscaresteban.es>
Date: Mon, 27 Apr 2020 16:48:40 -0700
Subject: [PATCH 2/4] enh: focus the PR in only one function, add test

Replaces: #29
---
 dmriprep/interfaces/vectors.py       |   4 +-
 dmriprep/utils/tests/test_vectors.py |  28 ++++++--
 dmriprep/utils/vectors.py            | 102 +++++++--------------------
 3 files changed, 48 insertions(+), 86 deletions(-)

diff --git a/dmriprep/interfaces/vectors.py b/dmriprep/interfaces/vectors.py
index 1ebb9f96..1329bd62 100644
--- a/dmriprep/interfaces/vectors.py
+++ b/dmriprep/interfaces/vectors.py
@@ -21,7 +21,6 @@ class _CheckGradientTableInputSpec(BaseInterfaceInputSpec):
     b0_threshold = traits.Float(B0_THRESHOLD, usedefault=True)
     bvec_norm_epsilon = traits.Float(BVEC_NORM_EPSILON, usedefault=True)
     b_scale = traits.Bool(True, usedefault=True)
-    image_consistency = traits.Bool(False, usedefault=True)
 
 
 class _CheckGradientTableOutputSpec(TraitedSpec):
@@ -76,9 +75,8 @@ def _run_interface(self, runtime):
             bvals=_undefined(self.inputs, "in_bval"),
             rasb_file=rasb_file,
             b_scale=self.inputs.b_scale,
-            image_consistency=self.inputs.image_consistency,
             bvec_norm_epsilon=self.inputs.bvec_norm_epsilon,
-            b0_threshold=self.inputs.b0_threshold
+            b0_threshold=self.inputs.b0_threshold,
         )
         pole = table.pole
         self._results["pole"] = tuple(pole)
diff --git a/dmriprep/utils/tests/test_vectors.py b/dmriprep/utils/tests/test_vectors.py
index 0e3e4aaf..9f020d32 100644
--- a/dmriprep/utils/tests/test_vectors.py
+++ b/dmriprep/utils/tests/test_vectors.py
@@ -1,6 +1,7 @@
 """Test vector utilities."""
 import pytest
 import numpy as np
+import nibabel as nb
 from dmriprep.utils import vectors as v
 from collections import namedtuple
 
@@ -86,11 +87,6 @@ def test_corruption(tmpdir, dipy_test_data, monkeypatch):
     assert -1.0 <= np.max(np.abs(dgt.gradients[..., :-1])) <= 1.0
     assert dgt.normalized is True
 
-    # Test image gradient consistency
-    dgt = v.DiffusionGradientTable(dwi_file=dipy_test_data['dwi_file'],
-                                   bvals=bvals, bvecs=bvecs)
-    assert dgt.gradient_consistency is None
-
     def mock_func(*args, **kwargs):
         return "called!"
 
@@ -105,3 +101,25 @@ def mock_func(*args, **kwargs):
     # Miscellaneous tests
     with pytest.raises(ValueError):
         dgt.to_filename("path", filetype="mrtrix")
+
+
+def test_b0mask_from_data(tmp_path):
+    """Check the estimation of bzeros using the dwi data."""
+
+    highb = np.random.normal(100, 50, size=(40, 40, 40, 99))
+    mask_file = tmp_path / 'mask.nii.gz'
+
+    # Test 1: no lowb
+    dwi_file = tmp_path / 'only_highb.nii.gz'
+    nb.Nifti1Image(highb.astype(float), np.eye(4), None).to_filename(dwi_file)
+    nb.Nifti1Image(np.ones((40, 40, 40), dtype=np.uint8),
+                   np.eye(4), None).to_filename(mask_file)
+
+    assert v.b0mask_from_data(dwi_file, mask_file).sum() == 0
+
+    # Test 1: one lowb
+    lowb = np.random.normal(400, 50, size=(40, 40, 40, 1))
+    dwi_file = tmp_path / 'dwi.nii.gz'
+    nb.Nifti1Image(np.concatenate((lowb, highb), axis=3).astype(float),
+                   np.eye(4), None).to_filename(dwi_file)
+    assert v.b0mask_from_data(dwi_file, mask_file).sum() == 1
diff --git a/dmriprep/utils/vectors.py b/dmriprep/utils/vectors.py
index b7992be7..e4d054d7 100644
--- a/dmriprep/utils/vectors.py
+++ b/dmriprep/utils/vectors.py
@@ -103,7 +103,6 @@ def __init__(
 
         if dwi_file is not None:
             self.affine = dwi_file
-            self._dwi_file = dwi_file
         if rasb_file is not None:
             self.gradients = rasb_file
             if self.affine is not None:
@@ -279,18 +278,6 @@ def to_filename(self, filename, filetype="rasb"):
         else:
             raise ValueError('Unknown filetype "%s"' % filetype)
 
-    # @property
-    # def gradient_consistency(self):
-    #     """
-    #     Check that b gradients and signal variation in dwi image are consistent with one another.
-    #     """
-    #     if (Path(self._dwi_file).exists() is True) and (self._image_consistency is True):
-    #         return image_gradient_consistency_check(str(self._dwi_file), self.bvecs, self.bvals,
-    #                                                 b0_threshold=self._b0_thres)
-    #     else:
-    #         raise FileNotFoundError(
-    #             "DWI file not found, and is required for checking image-gradient consistency.")
-
 
 def normalize_gradients(
     bvecs,
@@ -498,73 +485,32 @@ def bvecs2ras(affine, bvecs, norm=True, bvec_norm_epsilon=0.2):
     return rotated_bvecs
 
 
-def image_gradient_consistency_check(dwi_file, bvecs, bvals, b0_threshold=B0_THRESHOLD):
+def rasb_dwi_length_check(dwi_file, rasb_file):
+    """Check the number of encoding vectors and number of orientations in the DWI file."""
+    return nb.load(dwi_file).shape[-1] == len(np.loadtxt(rasb_file, skiprows=1))
+
+
+def b0mask_from_data(dwi_file, mask_file, z_thres=3.0):
     """
-    Check that gradient encoding patterns found in the b-values correspond to those
-    found in the signal intensity variance across volumes of the dwi image.
+    Evaluate B0 locations relative to mean signal variation.
+
+    Standardizes (z-score) the average DWI signal within mask and threshold.
+    This is a data-driven way of estimating which volumes in the DWI dataset are
+    really encoding *low-b* acquisitions.
 
     Parameters
     ----------
-    dwi_file : str
-        Optionally provide a file path to the diffusion-weighted image series to which the
-        bvecs/bvals should correspond.
-    bvecs : m x n 2d array
-        B-vectors array.
-    bvals : 1d array
-        B-values float array.
-    b0_threshold : float
-        Gradient threshold below which volumes and vectors are considered B0's.
+    dwi_file : :obj:`str`
+        File path to the diffusion-weighted image series.
+    mask_file : :obj:`str`
+        File path to a mask corresponding to the DWI file.
+    z_thres : :obj:`float`
+        The z-value to consider a volume as a *low-b* orientation.
+
     """
-    import nibabel as nib
-    from dipy.core.gradients import gradient_table_from_bvals_bvecs
-    from sklearn.cluster import MeanShift, estimate_bandwidth
-
-    # Build gradient table object
-    gtab = gradient_table_from_bvals_bvecs(bvals, bvecs, b0_threshold=b0_threshold)
-
-    # Check that number of image volumes corresponds to the number of gradient encodings.
-    volume_num = nib.load(dwi_file).shape[-1]
-    if not len(bvals) == volume_num:
-        raise Exception("Expected %d total image samples but found %d total gradient encoding values", volume_num,
-                        len(bvals))
-    # Evaluate B0 locations relative to mean signal variation.
-    data = np.array(nib.load(dwi_file).dataobj)
-    signal_means = []
-    for vol in range(data.shape[-1]):
-        signal_means.append(np.mean(data[:, :, :, vol]))
-    signal_b0_indices = np.where(signal_means > np.mean(signal_means)+3*np.std(signal_means))[0]
-
-    # Check B0 locations first
-    if not np.allclose(np.where(gtab.b0s_mask == True)[0], signal_b0_indices):
-        raise UserWarning('B0 indices in vectors do not correspond to relative high-signal contrast volumes '
-                          'detected in dwi image.')
-
-    # Next check number of unique b encodings (i.e. shells) and their indices
-    X = np.array(signal_means).reshape(-1, 1)
-    ms = MeanShift(bandwidth=estimate_bandwidth(X, quantile=0.1), bin_seeding=True)
-    ms.fit(X)
-    labs, idx = np.unique(ms.labels_, return_index=True)
-    ix_img = []
-    i = -1
-    for val in range(len(ms.labels_)):
-        if val in np.sort(idx[::-1]):
-            i = i + 1
-        ix_img.append(labs[i])
-    ix_img = np.array(ix_img)
-
-    ix_vec = []
-    i = -1
-    for val in range(len(bvals)):
-        if bvals[val] != bvals[val-1]:
-            i = i + 1
-        ix_vec.append(i)
-    ix_vec = np.array(ix_vec)
-
-    if len(ms.cluster_centers_) != len(np.unique(bvals)):
-        raise UserWarning('Number of unique b-values does not correspond to number of unique signal gradient '
-                          'encoding intensities in the dwi image.')
-
-    if np.all(np.isclose(ix_img, ix_vec)) is True:
-        raise UserWarning('Positions of b-value B0\'s and shell(s) do not correspond to signal intensity '
-                          'fluctuation patterns in the dwi image.')
-    return
+    data = np.asanyarray(nb.load(dwi_file).dataobj)
+    mask = np.asanyarray(nb.load(mask_file).dataobj) > 0.5
+    signal_means = np.median(data[mask, np.newaxis], axis=0)
+    zscored_means = signal_means - signal_means.mean()
+    zscored_means /= zscored_means.std()
+    return zscored_means > z_thres

From 7757d0f240a64bb6b9fa478736aaa1c458c35de3 Mon Sep 17 00:00:00 2001
From: oesteban <code@oscaresteban.es>
Date: Mon, 27 Apr 2020 17:22:38 -0700
Subject: [PATCH 3/4] fix(test): the high-b array had too large scale, reducing
 to preempt false positives

---
 dmriprep/utils/tests/test_vectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dmriprep/utils/tests/test_vectors.py b/dmriprep/utils/tests/test_vectors.py
index 9f020d32..f1bdbd2c 100644
--- a/dmriprep/utils/tests/test_vectors.py
+++ b/dmriprep/utils/tests/test_vectors.py
@@ -106,7 +106,7 @@ def mock_func(*args, **kwargs):
 def test_b0mask_from_data(tmp_path):
     """Check the estimation of bzeros using the dwi data."""
 
-    highb = np.random.normal(100, 50, size=(40, 40, 40, 99))
+    highb = np.random.normal(100, 5, size=(40, 40, 40, 99))
     mask_file = tmp_path / 'mask.nii.gz'
 
     # Test 1: no lowb

From 71973b413d377a7d2dd891413f7d1cfdaf5c44fc Mon Sep 17 00:00:00 2001
From: oesteban <code@oscaresteban.es>
Date: Mon, 27 Apr 2020 17:32:54 -0700
Subject: [PATCH 4/4] enh: use median for robuster calculation

---
 dmriprep/utils/vectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dmriprep/utils/vectors.py b/dmriprep/utils/vectors.py
index e4d054d7..c29bf239 100644
--- a/dmriprep/utils/vectors.py
+++ b/dmriprep/utils/vectors.py
@@ -511,6 +511,6 @@ def b0mask_from_data(dwi_file, mask_file, z_thres=3.0):
     data = np.asanyarray(nb.load(dwi_file).dataobj)
     mask = np.asanyarray(nb.load(mask_file).dataobj) > 0.5
     signal_means = np.median(data[mask, np.newaxis], axis=0)
-    zscored_means = signal_means - signal_means.mean()
+    zscored_means = signal_means - np.median(signal_means)
     zscored_means /= zscored_means.std()
     return zscored_means > z_thres