Improve docstrings and remove redundant checks (fixes #734) (#808)

dask · Apr 15, 2021 · b2689d3 · b2689d3
1 parent d762673
commit b2689d3
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 29 deletions.
diff --git a/dask_ml/decomposition/incremental_pca.py b/dask_ml/decomposition/incremental_pca.py
@@ -128,11 +128,11 @@ def __init__(
         n_components=None,
         whiten=False,
         center=True,
-        copy=True,
         batch_size=None,
         svd_solver="auto",
         iterated_power=0,
         random_state=None,
+        copy=True,
     ):
         self.n_components = n_components
         self.whiten = whiten
@@ -144,7 +144,6 @@ def __init__(
         self.random_state = random_state
 
     def _check_params(self):
-        super()._check_params()
         if self.center is False:
             raise ValueError("IncrementalPCA with center=False is not supported.")
 

diff --git a/dask_ml/decomposition/pca.py b/dask_ml/decomposition/pca.py
@@ -51,7 +51,7 @@ class PCA(sklearn.decomposition.PCA):
         ignored
 
     whiten : bool, optional (default False)
-        When True (False by default) the `components_` vectors are multiplied
+        When True (False by default) the ``components_`` vectors are multiplied
         by the square root of n_samples and then divided by the singular values
         to ensure uncorrelated outputs with unit component-wise variances.
 
@@ -60,25 +60,10 @@ class PCA(sklearn.decomposition.PCA):
         improve the predictive accuracy of the downstream estimators by
         making their data respect some hard-wired assumptions.
 
-    center : bool, optional (default True)
-        When False (True by default), the underlying data gets centered at zero
-        by subtracting the mean of the data from the data itself.
-
-        PCA is performed on centered data due to its being a regression model,
-        without an intercept. As such, its pricipal components originate at the
-        origin of the transformed space.
-
-        `center` set to False may be employed when performing PCA on already
-        centered data.
-
-        Since centering is a required step as part of whitening, `center` set
-        to False and `whiten` set to True is a combination which may result in
-        unexpected behavior, if performed on not previously centered data.
-
     svd_solver : string {'auto', 'full', 'tsqr', 'randomized'}
         auto :
-            the solver is selected by a default policy based on `X.shape` and
-            `n_components`: if the input data is larger than 500x500 and the
+            the solver is selected by a default policy based on ``X.shape`` and
+            ``n_components``: if the input data is larger than 500x500 and the
             number of components to extract is lower than 80% of the smallest
             dimension of the data, then the more efficient 'randomized'
             method is enabled. Otherwise the exact full SVD is computed and
@@ -99,7 +84,22 @@ class PCA(sklearn.decomposition.PCA):
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
-        by `da.random`. Used when ``svd_solver`` == 'randomized'.
+        by ``da.random``. Used when ``svd_solver`` == 'randomized'.
+
+    center : bool, optional (default True)
+        When True (the default), the underlying data gets centered at zero
+        by subtracting the mean of the data from the data itself.
+
+        PCA is performed on centered data due to its being a regression model,
+        without an intercept. As such, its principal components originate at the
+        origin of the transformed space.
+
+        ``center=False`` may be employed when performing PCA on already
+        centered data.
+
+        Since centering is a required step as part of whitening, ``center`` set
+        to False and ``whiten`` set to True is a combination which may result in
+        unexpected behavior, if performed on not previously centered data.
 
     Attributes
     ----------
@@ -128,7 +128,7 @@ class PCA(sklearn.decomposition.PCA):
     mean_ : array, shape (n_features,)
         Per-feature empirical mean, estimated from the training set.
 
-        Equal to `X.mean(axis=0)`.
+        Equal to ``X.mean(axis=0)``.
 
     n_components_ : int
         The estimated number of components. When n_components is set
@@ -197,20 +197,22 @@ class PCA(sklearn.decomposition.PCA):
       ``dask.linalg.svd_compressed``.
     * n_components : ``n_components='mle'`` is not allowed.
       Fractional ``n_components`` between 0 and 1 is not allowed.
-    * center : defaults to ``True`` and enables control over whether centering
-      gets implicitly performed as part of the PCA model steps.
+    * center : if ``True`` (the default), automatically center input data before
+      performing PCA.
+      Set this parameter to ``False``, if the input data have already been
+      centered before running ``fit()``.
     """
 
     def __init__(
         self,
         n_components=None,
         copy=True,
         whiten=False,
-        center=True,
         svd_solver="auto",
         tol=0.0,
         iterated_power=0,
         random_state=None,
+        center=True,
     ):
         self.n_components = n_components
         self.copy = copy
@@ -221,14 +223,10 @@ def __init__(
         self.iterated_power = iterated_power
         self.random_state = random_state
 
-    def _check_params(self):
-        pass
-
     def fit(self, X, y=None):
         if not dask.is_dask_collection(X):
             raise TypeError(_TYPE_MSG.format(type(X)))
 
-        self._check_params()
         self._fit(X)
         self.n_features_in_ = X.shape[1]
         return self