55
66import numpy as np
77
8- from sklearn .utils .extmath import row_norms
98from sklearn .externals .six .moves import xrange
109
1110from .base import BaseClassifier , BaseRegressor
1211from .dataset_fast import get_dataset
13- from .sag_fast import _sag_fit
12+ from .sag_fast import _sag_fit , get_auto_step_size
1413
1514from .sgd_fast import ModifiedHuber
1615from .sgd_fast import SmoothHinge
2019from .sag_fast import L1Penalty
2120
2221
23- def get_auto_step_size (X , alpha , loss , gamma = None , sample_weight = None ):
24- """Compute automatic step size for SAG solver
25- Stepsize computed using the following objective:
26- minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i)
27- + alpha * 0.5 * ||w||^2_2
28- Parameters
29- ----------
30- X : ndarray
31- Array of samples x_i.
32- alpha : float
33- Constant that multiplies the l2 penalty term.
34- loss : string, in {"log", "squared"}
35- The loss function used in SAG solver.
36-
37- Returns
38- -------
39- step_size : float
40- Step size used in SAG/SAGA solver.
41- """
42- if sample_weight is None :
43- weighted_norms = row_norms (X , squared = True )
44- else :
45- weighted_norms = sample_weight * row_norms (X , squared = True )
46- L = np .max (weighted_norms )
47- n_samples = X .shape [0 ]
48-
49- if loss == 'log' :
50- # inverse Lipschitz constant for log loss
51- lipschitz_constant = 0.25 * L + alpha
52- elif loss == 'squared' :
53- lipschitz_constant = L + alpha
54- elif loss == 'modified_huber' :
55- lipschitz_constant = 2 * L + alpha
56- elif loss == 'smooth_hinge' :
57- lipschitz_constant = L + gamma + alpha
58- elif loss == 'squared_hinge' :
59- lipschitz_constant = 2 * L + alpha
60- else :
61- raise ValueError ("`auto` stepsize is only available for `squared` or "
62- "`log` losses (got `%s` loss). Please specify a "
63- "stepsize." % loss )
64- return 1.0 / lipschitz_constant
65-
66-
6722class _BaseSAG (object ):
6823
6924 def _get_loss (self ):
@@ -92,9 +47,10 @@ def _finalize_coef(self):
9247 self .coef_scale_ .fill (1.0 )
9348
9449 def _fit (self , X , Y , sample_weight ):
95- n_samples , n_features = X .shape
9650 rng = self ._get_random_state ()
9751 adaptive_step_size = False
52+ ds = get_dataset (X , order = "c" )
53+ n_samples , n_features = ds .get_n_samples (), ds .get_n_features ()
9854
9955 if sample_weight is None :
10056 sample_weight = np .ones (n_samples , dtype = np .float64 )
@@ -103,7 +59,7 @@ def _fit(self, X, Y, sample_weight):
10359
10460 if self .eta is None or self .eta in ('auto' , 'line-search' ):
10561 step_size = get_auto_step_size (
106- X , self .alpha , self .loss , self .gamma , sample_weight = sample_weight )
62+ ds , self .alpha , self .loss , self .gamma , sample_weight = sample_weight )
10763 if self .verbose > 0 :
10864 print ("Auto stepsize: %s" % self .eta )
10965 if self .eta == 'line-search' :
@@ -116,7 +72,6 @@ def _fit(self, X, Y, sample_weight):
11672 penalty = self ._get_penalty ()
11773 n_vectors = Y .shape [1 ]
11874 n_inner = int (self .n_inner * n_samples )
119- ds = get_dataset (X , order = "c" )
12075
12176 self .coef_ = np .zeros ((n_vectors , n_features ), dtype = np .float64 )
12277 self .coef_scale_ = np .ones (n_vectors , dtype = np .float64 )
@@ -190,6 +145,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
190145 self .is_saga = False
191146
192147 def fit (self , X , y , sample_weight = None ):
148+ """
149+ Parameters
150+ ----------
151+ X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
152+ y : numpy array of size (n_samples,)
153+ sample_weight : numpy array of size (n_samples,), optional
154+
155+ Returns
156+ -------
157+ self
158+ """
193159 if not self .is_saga and self .penalty is not None :
194160 raise ValueError ('Penalties in SAGClassifier. Please use '
195161 'SAGAClassifier instead.'
@@ -306,6 +272,17 @@ def __init__(self, eta='auto', alpha=1.0, beta=0.0, loss="smooth_hinge",
306272 self .is_saga = False
307273
308274 def fit (self , X , y , sample_weight = None ):
275+ """
276+ Parameters
277+ ----------
278+ X : numpy array, sparse matrix or RowDataset of size (n_samples, n_features)
279+ y : numpy array of size (n_samples,)
280+ sample_weight : numpy array of size (n_samples,), optional
281+
282+ Returns
283+ -------
284+ self
285+ """
309286 if not self .is_saga and self .penalty is not None :
310287 raise ValueError ('Penalties are not supported in SAGRegressor. '
311288 'Please use SAGARegressor instead.' )
0 commit comments