scikit-learn-contrib
diff --git a/‎lightning/impl/sag.py‎
Lines changed: 26 additions & 49 deletions b/‎lightning/impl/sag.py‎
Lines changed: 26 additions & 49 deletions
@@ -5,12 +5,11 @@
 
 import numpy as np
 
-from sklearn.utils.extmath import row_norms
 from sklearn.externals.six.moves import xrange
 
 from .base import BaseClassifier, BaseRegressor
 from .dataset_fast import get_dataset
-from .sag_fast import _sag_fit
+from .sag_fast import _sag_fit, get_auto_step_size
 
 from .sgd_fast import ModifiedHuber
 from .sgd_fast import SmoothHinge
@@ -20,50 +19,6 @@
 from .sag_fast import L1Penalty
 
 
-def get_auto_step_size(X, alpha, loss, gamma=None, sample_weight=None):
-    """Compute automatic step size for SAG solver
-    Stepsize computed using the following objective:
-        minimize_w  1 / n_samples * \sum_i loss(w^T x_i, y_i)
-                    + alpha * 0.5 * ||w||^2_2
-    Parameters
-    ----------
-    X : ndarray
-        Array of samples x_i.
-    alpha : float
-        Constant that multiplies the l2 penalty term.
-    loss : string, in {"log", "squared"}
-        The loss function used in SAG solver.
-
-    Returns
-    -------
-    step_size : float
-        Step size used in SAG/SAGA solver.
-    """
-    if sample_weight is None:
-        weighted_norms = row_norms(X, squared=True)
-    else:
-        weighted_norms = sample_weight * row_norms(X, squared=True)
-    L = np.max(weighted_norms)
-    n_samples = X.shape[0]
-
-    if loss == 'log':
-        # inverse Lipschitz constant for log loss
-        lipschitz_constant = 0.25 * L + alpha
-    elif loss == 'squared':
-        lipschitz_constant = L + alpha
-    elif loss == 'modified_huber':
-        lipschitz_constant = 2 * L + alpha
-    elif loss == 'smooth_hinge':
-        lipschitz_constant = L + gamma + alpha
-    elif loss == 'squared_hinge':
-        lipschitz_constant = 2 * L + alpha
-    else:
-        raise ValueError("`auto` stepsize is only available for `squared` or "
-                         "`log` losses (got `%s` loss). Please specify a "
-                         "stepsize." % loss)
-    return 1.0 / lipschitz_constant
-
-
 class _BaseSAG(object):
 
     def _get_loss(self):
@@ -92,9 +47,10 @@ def _finalize_coef(self):
         self.coef_scale_.fill(1.0)
 
     def _fit(self, X, Y, sample_weight):
-        n_samples, n_features = X.shape
         rng = self._get_random_state()
         adaptive_step_size = False
+        ds = get_dataset(X, order="c")
+        n_samples, n_features = ds.get_n_samples(), ds.get_n_features()
 
         if sample_weight is None:
             sample_weight = np.ones(n_samples, dtype=np.float64)
@@ -103,7 +59,7 @@ def _fit(self, X, Y, sample_weight):
 
         if self.eta is None or self.eta in ('auto', 'line-search'):
             step_size = get_auto_step_size(
-                    X, self.alpha, self.loss, self.gamma, sample_weight=sample_weight)
+                    ds, self.alpha, self.loss, self.gamma, sample_weight=sample_weight)
             if self.verbose > 0:
                 print("Auto stepsize: %s" % self.eta)
             if self.eta == 'line-search':
@@ -116,7 +72,6 @@ def _fit(self, X, Y, sample_weight):
         penalty = self._get_penalty()
         n_vectors = Y.shape[1]
         n_inner = int(self.n_inner * n_samples)
-        ds = get_dataset(X, order="c")
 
         self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
         self.coef_scale_ = np.ones(n_vectors, dtype=np.float64)
@@ -190,6 +145,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
         self.is_saga = False
 
     def fit(self, X, y, sample_weight=None):
+        """
+        Parameters
+        ----------
+        X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
+        y : numpy array of size (n_samples,)
+        sample_weight : numpy array of size (n_samples,), optional
+
+        Returns
+        -------
+        self
+        """
         if not self.is_saga and self.penalty is not None:
             raise ValueError('Penalties in SAGClassifier. Please use '
                              'SAGAClassifier instead.'
@@ -306,6 +272,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
         self.is_saga = False
 
     def fit(self, X, y, sample_weight=None):
+        """
+        Parameters
+        ----------
+        X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
+        y : numpy array of size (n_samples,)
+        sample_weight : numpy array of size (n_samples,), optional
+
+        Returns
+        -------
+        self
+        """
         if not self.is_saga and self.penalty is not None:
             raise ValueError('Penalties are not supported in SAGRegressor. '
                              'Please use SAGARegressor instead.')