From fd1b2c3f0ee03cfd509f3f7af56b86d9d80df89f Mon Sep 17 00:00:00 2001
From: Scott Sievert <sievert.scott@icloud.com>
Date: Wed, 14 Mar 2018 22:51:31 -0500
Subject: [PATCH] BUG: dataframe size info not exact and indexing needed

---
 dask_glm/algorithms.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index 9052c57..fe58437 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -145,7 +145,7 @@ def _choose_step_sgd(initial, k):
 
 @normalize
 def sgd(X, y, max_iter=1e3, tol=1e-8, family=Logistic, batch_size=64,
-        initial_step=10.0, n=None, **kwargs):
+        initial_step=1.0, **kwargs):
     """Stochastic Gradient Descent.
 
     Parameters
@@ -164,34 +164,33 @@ def sgd(X, y, max_iter=1e3, tol=1e-8, family=Logistic, batch_size=64,
     initial_step : float
         Initial step size used in the optimization. The step size decays like
         initial_step/(1 + iter_count).
-    n : int
-        The number of examples, or the first dimension of the matrix X. This argument will only be used if X.shape[1] is NaN.
     family : Family
 
     Returns
     -------
     beta : array-like, shape (n_features,)
     """
-    gradient, hessian = family.gradient, family.hessian
-    n_examples, p = X.shape
-    if not np.isnan(n_examples):
-        n = n_examples
-    if n is None:
-        raise ValueError('Pass number of examples in with kwarg `n`')
-    beta = np.zeros(p)  # always init to zeros?
+    gradient = family.gradient
+    n, p = X.shape
+    if np.isnan(n):
+        raise ValueError('SGD needs shape information to allow indexing. '
+                         'Possible by passing a computed array in (`X.compute()` '
+                         'or `X.values.compute()`), then doing using '
+                         '`dask.array.from_array ')
+
+    beta = np.zeros(p)
 
     iter_count = 0
     converged = False
 
     while not converged:
-        beta_old = beta
+        beta_old = beta.copy()
         iter_count += 1
 
         i = np.random.choice(n, size=(batch_size,))
         Xbeta = dot(X[i], beta)
 
-        grad = gradient(Xbeta, X[i], y[i])
-        (grad,) = compute((grad,))
+        grad = gradient(Xbeta, X[i], y[i]).compute()
 
         beta -= _choose_step_sgd(initial_step, iter_count) * grad / batch_size